"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "ae3b0a676647cb75fdca5c74987ab6c998384c17"
Commit 126cae0c authored by Chao Liu's avatar Chao Liu
Browse files

bug fix

parent e1a67b69
......@@ -185,6 +185,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
InBlockCopyDstDataPerWrite_N2>(
{0, 0, b_block_data_on_global, 0}, {0, 0, 0, 0});
#if 0
// weight tensor
// tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc =
......@@ -192,6 +193,13 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else // hack
constexpr auto wei_e_k_global_desc_old =
WeiGlobalDesc::Unfold(I1, I3).ReorderGivenNew2Old(Sequence<1, 0>{});
constexpr auto wei_e_k_global_desc = make_native_tensor_descriptor(
wei_e_k_global_desc_old.GetLengths(), wei_e_k_global_desc_old.GetStrides());
#endif
// tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment
......
......@@ -184,23 +184,27 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
InBlockCopyDstDataPerWrite_N2>(
{0, 0, b_block_data_on_global, 0}, {0, 0, 0, 0});
#if 0
// weight tensor
// tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc =
#if 0
transform_tensor_descriptor(wei_k_c_y_x_global_desc,
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else // hack
make_native_tensor_descriptor_packed(Sequence<K, C * Y * X>{});
constexpr auto wei_e_k_global_desc_old =
WeiGlobalDesc::Unfold(I1, I3).ReorderGivenNew2Old(Sequence<1, 0>{});
constexpr auto wei_e_k_global_desc = make_native_tensor_descriptor(
wei_e_k_global_desc_old.GetLengths(), wei_e_k_global_desc_old.GetStrides());
#endif
// tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment
constexpr auto wei_e_k_block_desc = make_native_tensor_descriptor_aligned(
Sequence<EPerBlock, KPerBlock>{},
Number<math::lcm(WeiBlockCopyDstDataPerWrite_K, GemmDataPerReadA)>{});
// tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment
constexpr auto wei_e_k_block_desc = make_native_tensor_descriptor_aligned(
Sequence<EPerBlock, KPerBlock>{},
Number<math::lcm(WeiBlockCopyDstDataPerWrite_K, GemmDataPerReadA)>{});
// operator for blockwise copy of weight into LDS
// slice a tensor, and copy it into another tensor
......
......@@ -313,14 +313,14 @@ struct TensorCoordinate
private:
template <class... Ts>
__host__ __device__ static constexpr auto
MakeDummyTensorCoordinate(ConstantTensorDescriptor<Ts...>)
MakeDummyTensorCoordinate(ConstantTensorDescriptor<Ts...>)
{
return NormalTensorCoordinate<ConstantTensorDescriptor<Ts...>>();
}
template <class... Ts>
__host__ __device__ static constexpr auto
MakeDummyTensorCoordinate(ConstantMergedTensorDescriptor<Ts...>)
MakeDummyTensorCoordinate(ConstantMergedTensorDescriptor<Ts...>)
{
return MergedTensorCoordinate<ConstantMergedTensorDescriptor<Ts...>>();
}
......
......@@ -188,7 +188,7 @@ struct TensorCoordinate_v2
private:
template <typename... Ts>
__host__ __device__ static constexpr auto
MakeDummyTensorCoordinate(NativeTensorDescriptor<Ts...>)
MakeDummyTensorCoordinate(NativeTensorDescriptor<Ts...>)
{
return NativeTensorCoordinate<NativeTensorDescriptor<Ts...>>(
make_zero_array<index_t, TensorDesc::GetNumOfDimension()>());
......@@ -196,7 +196,7 @@ struct TensorCoordinate_v2
template <typename... Ts>
__host__ __device__ static constexpr auto
MakeDummyTensorCoordinate(TransformedTensorDescriptor<Ts...>)
MakeDummyTensorCoordinate(TransformedTensorDescriptor<Ts...>)
{
return TransformedTensorCoordinate<TransformedTensorDescriptor<Ts...>>(
make_zero_array<index_t, TensorDesc::GetNumOfDimension()>());
......
......@@ -85,12 +85,6 @@ struct NativeTensorDescriptor
return offset;
}
// TODO: remove this
__host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(const Index& idx)
{
return CalculateOffset(idx);
}
__host__ __device__ static constexpr index_t CalculateOffsetDiff(const Index& idx_diff)
{
index_t offset_diff = 0;
......@@ -227,13 +221,6 @@ struct TransformedTensorDescriptor
return LowTensorDescriptor{};
}
#if 0
__host__ __device__ static constexpr auto GetLowerLengths()
{
return GetLowerTensorDescriptor().GetLengths();
}
#endif
struct lambda_GetUpperLengths
{
template <typename Transform>
......@@ -359,12 +346,6 @@ struct TransformedTensorDescriptor
return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up));
}
// TODO: remove this
__host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(const UpperIndex& idx_up)
{
return CalculateOffset(idx_up);
}
#if 0
template <index_t IDim>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
......
......@@ -49,7 +49,7 @@ struct GeneratorTensor_3
{
std::array<index_t, sizeof...(Is)> dims = {{static_cast<index_t>(is)...}};
auto f_acc = [](auto a, auto b) { return 100 * a + b; };
auto f_acc = [](auto a, auto b) { return 10 * a + b; };
return std::accumulate(dims.begin(), dims.end(), index_t(0), f_acc);
}
......@@ -75,19 +75,19 @@ int main(int argc, char* argv[])
using namespace ck;
#if 0
constexpr index_t N = 256;
constexpr index_t C = 64;
constexpr index_t HI = 17;
constexpr index_t WI = 17;
constexpr index_t K = 256;
constexpr index_t Y = 17;
constexpr index_t X = 17;
constexpr index_t N = 8;
constexpr index_t C = 8;
constexpr index_t HI = 2;
constexpr index_t WI = 8;
constexpr index_t K = 128;
constexpr index_t Y = 1;
constexpr index_t X = 1;
using ConvStrides = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<0, 3>;
using RightPads = Sequence<0, 3>;
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif 0
// 3x3, 34x34
constexpr index_t N = 64;
......@@ -347,7 +347,7 @@ int main(int argc, char* argv[])
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
#elif 0
in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
#elif 0
in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment