Commit e1a67b69 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent f7be86b9
......@@ -51,7 +51,7 @@ template <index_t GridSize,
index_t WeiBlockCopyDstDataPerWrite_K>
struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
{
#if 0
#if 1
__device__ void Run(const Float* const __restrict__ p_in_global,
const Float* const __restrict__ p_wei_global,
Float* const __restrict__ p_out_global) const
......
......@@ -187,10 +187,14 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
// weight tensor
// tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc =
#if 0
transform_tensor_descriptor(wei_k_c_y_x_global_desc,
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else // hack
make_native_tensor_descriptor_packed(Sequence<K, C * Y * X>{});
#endif
// tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment
......
......@@ -47,7 +47,7 @@ struct NativeTensorCoordinate
// mIndex is updated here, but some (or all) of its entries may never be used
mIndex += idx_diff;
mOffset += tensor_desc_type::CalculateOffset(idx_diff);
mOffset += tensor_desc_type::CalculateOffsetDiff(idx_diff);
return *this;
}
......@@ -57,7 +57,7 @@ struct NativeTensorCoordinate
// mIndex is updated here, but some (or all) of its entries may never be used
mIndex -= idx_diff;
mOffset -= tensor_desc_type::CalculateOffset(idx_diff);
mOffset -= tensor_desc_type::CalculateOffsetDiff(idx_diff);
return *this;
}
......
......@@ -684,12 +684,10 @@ template <index_t BlockSize,
struct BlockwiseGenericTensorSliceCopy_v4
{
static constexpr index_t nDim = SrcDesc::GetNumOfDimension();
using Index = MultiIndex<nDim>;
using SrcCoord = typename TensorCoordinate_v2<SrcDesc>::type;
using DstCoord = typename TensorCoordinate_v2<DstDesc>::type;
__device__ constexpr BlockwiseGenericTensorSliceCopy_v4(SrcCoord src_block_slice_origin,
DstCoord dst_block_slice_origin)
__device__ constexpr BlockwiseGenericTensorSliceCopy_v4(const Index& src_block_slice_origin,
const Index& dst_block_slice_origin)
{
static_assert(nDim == SrcDesc::GetNumOfDimension() &&
nDim == DstDesc::GetNumOfDimension() && nDim == SliceLengths::Size() &&
......
......@@ -966,8 +966,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
using SrcCoord = typename TensorCoordinate_v2<SrcDesc>::type;
using DstCoord = typename TensorCoordinate_v2<DstDesc>::type;
__device__ constexpr ThreadwiseGenericTensorSliceCopy_v4r2(SrcCoord src_slice_origin,
DstCoord dst_slice_origin)
__device__ constexpr ThreadwiseGenericTensorSliceCopy_v4r2(const Index& src_slice_origin,
const Index& dst_slice_origin)
: mSrcSliceOrigin(src_slice_origin), mDstSliceOrigin(dst_slice_origin)
{
static_assert(nDim == SrcDesc::GetNumOfDimension() &&
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment