"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "4cf69087c4dbc260ef9016d1d80ce855c018404c"
Commit 5494423f authored by Jing Zhang's avatar Jing Zhang
Browse files

add vector store

parent acdafc67
......@@ -140,7 +140,18 @@ struct ThreadwiseGenericTensorSliceCopy_v5
__device__ static void
run(float* p_dst, const float src_data, const DstCoord dst_coord_begin)
{
store_data<float>(src_data, p_dst, dst_coord_begin.GetOffset());
store_data(src_data, p_dst, dst_coord_begin.GetOffset());
}
};
template <>
struct vector_data_store<float, 2>
{
template <typename DstCoord>
__device__ static void
run(float* p_dst, const float2_t src_data, const DstCoord dst_coord_begin)
{
store_data(src_data, p_dst, dst_coord_begin.GetOffset());
}
};
......@@ -185,7 +196,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5
constexpr auto dst_data_per_access = Number<DstDataPerWrite>{};
static_assert(DstDataPerWrite == 1, "");
static_assert(DstDataPerWrite == 1 || DstDataPerWrite == 2, "");
constexpr auto long_vector_size = dst_data_per_access;
......
......@@ -184,7 +184,7 @@ void device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
using GemmBBlockCopyThreadClusterLengths_GemmK_GemmN = Sequence<8, 32>;
constexpr index_t GemmBBlockCopySrcDataPerRead_GemmN = 4;
constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmN = 1;
constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmN = 2;
constexpr index_t GemmCThreadCopyDstDataPerWrite_GemmN1 = 4;
#elif 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment