Commit c9a8e558 authored by Chao Liu's avatar Chao Liu
Browse files

adding tensor_view

parent 8669e242
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW #define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp" #include "blockwise_3d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp" #include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp" #include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp" #include "threadwise_tensor_slice_copy.hpp"
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp" #include "constant_merged_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp" #include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp" #include "constant_merged_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp" #include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp" #include "constant_merged_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp" #include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp" #include "threadwise_generic_tensor_slice_copy.hpp"
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R1_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp" #include "constant_merged_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp" #include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp" #include "threadwise_generic_tensor_slice_copy.hpp"
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R2_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R2_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp" #include "constant_merged_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp" #include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp" #include "threadwise_generic_tensor_slice_copy.hpp"
...@@ -181,12 +181,6 @@ struct GridwiseConvolutionImplicitGemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer ...@@ -181,12 +181,6 @@ struct GridwiseConvolutionImplicitGemm_v4r2_nchw_kcyx_nkhw_lds_double_buffer
InBlockCopyDataPerAccess_W2>({0, 0, 0, 0, b_block_data_on_global, 0, 0, 0}, InBlockCopyDataPerAccess_W2>({0, 0, 0, 0, b_block_data_on_global, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0}); {0, 0, 0, 0, 0, 0, 0, 0});
#if 0
{
printf("id (%d %d), in offset: %d %d\n", get_block_1d_id(), get_thread_local_1d_id(), blockwise_in_copy.mThreadSrcOffset, blockwise_in_copy.mThreadDstOffset);
}
#endif
// weight tensor // weight tensor
// tensor descriptor in device memory, src of blockwise copy // tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc = constexpr auto wei_e_k_global_desc =
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R3_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R3_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp" #include "constant_merged_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp" #include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp" #include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp" #include "threadwise_generic_tensor_slice_copy.hpp"
...@@ -296,7 +296,16 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer ...@@ -296,7 +296,16 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
// LDS double buffer: preload data into LDS // LDS double buffer: preload data into LDS
{ {
#if 0
blockwise_in_copy.Run(p_in_global, p_in_block_double); blockwise_in_copy.Run(p_in_global, p_in_block_double);
#endif
#if 1
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
printf("blockwise_wei_copy.Run \n");
}
#endif
blockwise_wei_copy.Run(p_wei_global, p_wei_block_double); blockwise_wei_copy.Run(p_wei_global, p_wei_block_double);
} }
...@@ -327,12 +336,36 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer ...@@ -327,12 +336,36 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
__syncthreads(); __syncthreads();
// LDS doubel buffer: load next data from device mem #if 0
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
for(index_t i = 0; i < wei_e_k_block_desc.GetLengths()[0]; ++i)
{
for(index_t j = 0; j < wei_e_k_block_desc.GetLengths()[1]; ++j)
{
printf("%d %d %f, ", i, j, p_wei_block_now[wei_e_k_block_desc.GetOffsetFromMultiIndex(i, j)]);
}
}
printf("\n");
}
#endif
// LDS doubel buffer: load next data from device mem
#if 0
blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard); blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard);
#endif
#if 1
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
printf("blockwise_wei_copy.RunLoad \n");
}
#endif
blockwise_wei_copy.RunLoadRegisterClipboard(p_wei_block_on_global, blockwise_wei_copy.RunLoadRegisterClipboard(p_wei_block_on_global,
p_wei_register_clipboard); p_wei_register_clipboard);
#if 1 #if 0
if(get_block_1d_id() == 0) if(get_block_1d_id() == 0)
{ {
printf("tid (%d %d), %f %f %f %f\n", printf("tid (%d %d), %f %f %f %f\n",
...@@ -348,9 +381,18 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer ...@@ -348,9 +381,18 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
// LDS double buffer: GEMM on current data // LDS double buffer: GEMM on current data
blockwise_gemm.Run(p_wei_block_now, p_in_block_now, p_out_thread); blockwise_gemm.Run(p_wei_block_now, p_in_block_now, p_out_thread);
// LDS double buffer: store next data to LDS // LDS double buffer: store next data to LDS
#if 0
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard, blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
p_in_block_next); p_in_block_next);
#endif
#if 1
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
printf("blockwise_wei_copy.RunStore \n");
}
#endif
blockwise_wei_copy.RunStoreRegisterClipboard(p_wei_register_clipboard, blockwise_wei_copy.RunStoreRegisterClipboard(p_wei_register_clipboard,
p_wei_block_next); p_wei_block_next);
} }
...@@ -367,17 +409,33 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer ...@@ -367,17 +409,33 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
__syncthreads(); __syncthreads();
// LDS doubel buffer: load next data from device mem // LDS doubel buffer: load next data from device mem
#if 0
blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard); blockwise_in_copy.RunLoadRegisterClipboard(p_in_global, p_in_register_clipboard);
#endif
#if 1
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
printf("blockwise_wei_copy.RunLoad \n");
}
#endif
blockwise_wei_copy.RunLoadRegisterClipboard(p_wei_block_on_global, blockwise_wei_copy.RunLoadRegisterClipboard(p_wei_block_on_global,
p_wei_register_clipboard); p_wei_register_clipboard);
// LDS double buffer: GEMM on current data // LDS double buffer: GEMM on current data
blockwise_gemm.Run(p_wei_block_double, p_in_block_double, p_out_thread); blockwise_gemm.Run(p_wei_block_double, p_in_block_double, p_out_thread);
// LDS double buffer: store next data to LDS // LDS double buffer: store next data to LDS
#if 0
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard, blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
p_in_block_double + in_block_space); p_in_block_double + in_block_space);
#endif
#if 1
if(get_block_1d_id() == 0 && get_thread_local_1d_id() == 0)
{
printf("blockwise_wei_copy.RunStore \n");
}
#endif
blockwise_wei_copy.RunStoreRegisterClipboard(p_wei_register_clipboard, blockwise_wei_copy.RunStoreRegisterClipboard(p_wei_register_clipboard,
p_wei_block_double + wei_block_space); p_wei_block_double + wei_block_space);
...@@ -444,6 +502,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer ...@@ -444,6 +502,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
out_k_n1_ho1_wo1_b_n2_ho2_wo2_global_merged_desc.GetOffsetFromMultiIndex( out_k_n1_ho1_wo1_b_n2_ho2_wo2_global_merged_desc.GetOffsetFromMultiIndex(
k_thread_data_on_global, 0, 0, 0, b_thread_data_on_global, 0, 0, 0); k_thread_data_on_global, 0, 0, 0, b_thread_data_on_global, 0, 0, 0);
#if 0
threadwise_generic_tensor_slice_copy_v1( threadwise_generic_tensor_slice_copy_v1(
out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc, out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc,
p_out_thread, p_out_thread,
...@@ -454,6 +513,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer ...@@ -454,6 +513,7 @@ struct GridwiseConvolutionImplicitGemm_v4r3_nchw_kcyx_nkhw_lds_double_buffer
out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc.GetLengths(), out_n0_n1_n2_k0_k1_k2_ho0_ho1_ho2_wo0_wo1_wo2_thread_desc.GetLengths(),
arithmetic_sequence_gen<0, 12, 1>::type{}, arithmetic_sequence_gen<0, 12, 1>::type{},
Number<1>{}); Number<1>{});
#endif
} }
} }
}; };
......
#pragma once #pragma once
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "blockwise_direct_convolution.hpp" #include "blockwise_direct_convolution.hpp"
......
#pragma once #pragma once
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
#include "ConstantMatrixDescriptor.hpp" #include "constant_matrix_descriptor.hpp"
#include "blockwise_4d_tensor_op.hpp" #include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp" #include "blockwise_2d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp" #include "threadwise_4d_tensor_op.hpp"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP #define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
namespace ck { namespace ck {
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP #define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#include "common_header.hpp" #include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp" #include "constant_tensor_descriptor.hpp"
namespace ck { namespace ck {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment