Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
1566b317
"...text-generation-inference.git" did not exist on "4f4857a4ac4d09483f72465e5adcd29f38b03b16"
Commit
1566b317
authored
Jun 13, 2019
by
Chao Liu
Browse files
reorginzed files
parent
c82b833d
Changes
64
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
133 additions
and
118 deletions
+133
-118
CMakeLists.txt
CMakeLists.txt
+14
-3
composable_kernel/include/gridwise_convolution_kernel_wrapper.hpp
...le_kernel/include/gridwise_convolution_kernel_wrapper.hpp
+0
-0
composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
...gorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
+6
-6
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
...ridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
+8
-8
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
...ridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
+9
-9
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
...ridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
+8
-8
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
...n_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
+8
-8
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
...ridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
+8
-8
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
+8
-8
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
.../gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
+6
-6
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
...ion_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
+7
-7
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
.../gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
+6
-6
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
...ion_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
+6
-6
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp
.../gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp
+7
-7
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp
...ion_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp
+14
-10
composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
...idwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
+7
-7
composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
...ise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
+7
-7
composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp
...l/include/tensor_description/ConstantMatrixDescriptor.hpp
+1
-1
composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp
...ude/tensor_description/ConstantMergedTensorDescriptor.hpp
+2
-2
composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp
...l/include/tensor_description/ConstantTensorDescriptor.hpp
+1
-1
No files found.
CMakeLists.txt
View file @
1566b317
...
@@ -46,8 +46,19 @@ endif()
...
@@ -46,8 +46,19 @@ endif()
#
#
include_directories
(
BEFORE
include_directories
(
BEFORE
include
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include
${
PROJECT_BINARY_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/utility
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/tensor_description
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/tensor_operation
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/kernel_algorithm
${
PROJECT_SOURCE_DIR
}
/driver/include
${
PROJECT_BINARY_DIR
}
/composable_kernel/include/utility
)
)
add_subdirectory
(
src
)
if
(
DEVICE_BACKEND STREQUAL
"AMD"
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/utility/config_amd.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/composable_kernel/include/utility/config.hpp"
)
elseif
(
DEVICE_BACKEND STREQUAL
"NVIDIA"
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/utility/config_nvidia.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/composable_kernel/include/utility/config.hpp"
)
endif
()
add_subdirectory
(
driver
)
add_subdirectory
(
driver
)
include/gridwise_convolution_kernel_wrapper.hpp
→
composable_kernel/
include/gridwise_convolution_kernel_wrapper.hpp
View file @
1566b317
File moved
include/
composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_direct_convolution.hpp"
#include "threadwise_direct_convolution.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_batched_gemm.hpp"
#include "blockwise_batched_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_3d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_batched_gemm.hpp"
#include "blockwise_batched_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_batched_gemm.hpp"
#include "blockwise_batched_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_batched_gemm.hpp"
#include "blockwise_batched_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_tensor_slice_copy.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_generic_tensor_op.hpp"
#include "threadwise_generic_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_batched_gemm.hpp"
#include "blockwise_batched_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_tensor_slice_copy.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_generic_tensor_op.hpp"
#include "threadwise_generic_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_batched_gemm.hpp"
#include "blockwise_batched_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMergedTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMergedTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMergedTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
1566b317
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "composable_kernel/utility/common.hpp"
#include "common_header.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#ifndef CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM
#define CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM 1
#endif
namespace
ck
{
namespace
ck
{
...
@@ -233,10 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -233,10 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer
// choose GEMM implementation here
// choose GEMM implementation here
const
auto
run_blockwise_gemm
=
[
&
](
auto
...
Xs
)
{
const
auto
run_blockwise_gemm
=
[
&
](
auto
...
Xs
)
{
#if 1
#if CK_USE_AMD_INLINE_ASM && CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM
return
blockwise_gemm
.
Run
(
Xs
...);
#else
return
blockwise_gemm
.
Run_asm
(
Xs
...);
return
blockwise_gemm
.
Run_asm
(
Xs
...);
#else
return
blockwise_gemm
.
Run
(
Xs
...);
#endif
#endif
};
};
...
...
include/
composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp
View file @
1566b317
#pragma once
#pragma once
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_direct_convolution.hpp"
#include "blockwise_direct_convolution.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_direct_convolution.hpp"
#include "threadwise_direct_convolution.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
→
composable_kernel/
include/
kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp
View file @
1566b317
#pragma once
#pragma once
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "
composable_kernel/tensor_description/
ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_4d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_2d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
threadwise_4d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "
composable_kernel/tensor_operation/
blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp
→
composable_kernel/
include/
tensor_description/ConstantMatrixDescriptor.hpp
View file @
1566b317
#ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp
→
composable_kernel/
include/
tensor_description/ConstantMergedTensorDescriptor.hpp
View file @
1566b317
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
#include "
composable_kernel/tensor_description/
ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/
composable_kernel/tensor_description/ConstantTensorDescriptor.hpp
→
composable_kernel/
include/
tensor_description/ConstantTensorDescriptor.hpp
View file @
1566b317
#ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#include "com
posable_kernel/utility/common
.hpp"
#include "com
mon_header
.hpp"
namespace
ck
{
namespace
ck
{
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment