"vscode:/vscode.git/clone" did not exist on "46e22b8aea6a78ce46de155f07e26b160be72de2"
Commit 126cae0c authored by Chao Liu's avatar Chao Liu
Browse files

bug fix

parent e1a67b69
...@@ -185,6 +185,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded ...@@ -185,6 +185,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
InBlockCopyDstDataPerWrite_N2>( InBlockCopyDstDataPerWrite_N2>(
{0, 0, b_block_data_on_global, 0}, {0, 0, 0, 0}); {0, 0, b_block_data_on_global, 0}, {0, 0, 0, 0});
#if 0
// weight tensor // weight tensor
// tensor descriptor in device memory, src of blockwise copy // tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc = constexpr auto wei_e_k_global_desc =
...@@ -192,6 +193,13 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded ...@@ -192,6 +193,13 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}), make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}), make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
#else // hack
constexpr auto wei_e_k_global_desc_old =
WeiGlobalDesc::Unfold(I1, I3).ReorderGivenNew2Old(Sequence<1, 0>{});
constexpr auto wei_e_k_global_desc = make_native_tensor_descriptor(
wei_e_k_global_desc_old.GetLengths(), wei_e_k_global_desc_old.GetStrides());
#endif
// tensor descriptor in LDS, dst of blockwise copy // tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment // be careful of LDS alignment
......
...@@ -184,16 +184,20 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf ...@@ -184,16 +184,20 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
InBlockCopyDstDataPerWrite_N2>( InBlockCopyDstDataPerWrite_N2>(
{0, 0, b_block_data_on_global, 0}, {0, 0, 0, 0}); {0, 0, b_block_data_on_global, 0}, {0, 0, 0, 0});
#if 0
// weight tensor // weight tensor
// tensor descriptor in device memory, src of blockwise copy // tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc = constexpr auto wei_e_k_global_desc =
#if 0
transform_tensor_descriptor(wei_k_c_y_x_global_desc, transform_tensor_descriptor(wei_k_c_y_x_global_desc,
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}), make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}), make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
#else // hack #else // hack
make_native_tensor_descriptor_packed(Sequence<K, C * Y * X>{}); constexpr auto wei_e_k_global_desc_old =
WeiGlobalDesc::Unfold(I1, I3).ReorderGivenNew2Old(Sequence<1, 0>{});
constexpr auto wei_e_k_global_desc = make_native_tensor_descriptor(
wei_e_k_global_desc_old.GetLengths(), wei_e_k_global_desc_old.GetStrides());
#endif #endif
// tensor descriptor in LDS, dst of blockwise copy // tensor descriptor in LDS, dst of blockwise copy
......
...@@ -85,12 +85,6 @@ struct NativeTensorDescriptor ...@@ -85,12 +85,6 @@ struct NativeTensorDescriptor
return offset; return offset;
} }
// TODO: remove this
__host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(const Index& idx)
{
return CalculateOffset(idx);
}
__host__ __device__ static constexpr index_t CalculateOffsetDiff(const Index& idx_diff) __host__ __device__ static constexpr index_t CalculateOffsetDiff(const Index& idx_diff)
{ {
index_t offset_diff = 0; index_t offset_diff = 0;
...@@ -227,13 +221,6 @@ struct TransformedTensorDescriptor ...@@ -227,13 +221,6 @@ struct TransformedTensorDescriptor
return LowTensorDescriptor{}; return LowTensorDescriptor{};
} }
#if 0
__host__ __device__ static constexpr auto GetLowerLengths()
{
return GetLowerTensorDescriptor().GetLengths();
}
#endif
struct lambda_GetUpperLengths struct lambda_GetUpperLengths
{ {
template <typename Transform> template <typename Transform>
...@@ -359,12 +346,6 @@ struct TransformedTensorDescriptor ...@@ -359,12 +346,6 @@ struct TransformedTensorDescriptor
return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up)); return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up));
} }
// TODO: remove this
__host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(const UpperIndex& idx_up)
{
return CalculateOffset(idx_up);
}
#if 0 #if 0
template <index_t IDim> template <index_t IDim>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>) __host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
......
...@@ -49,7 +49,7 @@ struct GeneratorTensor_3 ...@@ -49,7 +49,7 @@ struct GeneratorTensor_3
{ {
std::array<index_t, sizeof...(Is)> dims = {{static_cast<index_t>(is)...}}; std::array<index_t, sizeof...(Is)> dims = {{static_cast<index_t>(is)...}};
auto f_acc = [](auto a, auto b) { return 100 * a + b; }; auto f_acc = [](auto a, auto b) { return 10 * a + b; };
return std::accumulate(dims.begin(), dims.end(), index_t(0), f_acc); return std::accumulate(dims.begin(), dims.end(), index_t(0), f_acc);
} }
...@@ -75,19 +75,19 @@ int main(int argc, char* argv[]) ...@@ -75,19 +75,19 @@ int main(int argc, char* argv[])
using namespace ck; using namespace ck;
#if 0 #if 0
constexpr index_t N = 256; constexpr index_t N = 8;
constexpr index_t C = 64; constexpr index_t C = 8;
constexpr index_t HI = 17; constexpr index_t HI = 2;
constexpr index_t WI = 17; constexpr index_t WI = 8;
constexpr index_t K = 256; constexpr index_t K = 128;
constexpr index_t Y = 17; constexpr index_t Y = 1;
constexpr index_t X = 17; constexpr index_t X = 1;
using ConvStrides = Sequence<1, 1>; using ConvStrides = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>; using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<0, 3>; using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 3>; using RightPads = Sequence<0, 0>;
#elif 0 #elif 0
// 3x3, 34x34 // 3x3, 34x34
constexpr index_t N = 64; constexpr index_t N = 64;
...@@ -347,7 +347,7 @@ int main(int argc, char* argv[]) ...@@ -347,7 +347,7 @@ int main(int argc, char* argv[])
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread); wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
#elif 0 #elif 0
in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread); in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread); wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
#elif 0 #elif 0
in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread); in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread); wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment