Commit 1dbdab56 authored by Jing Zhang's avatar Jing Zhang
Browse files

merge develop

parents d2e49b23 bac7df8f
add_example_executable(example_batchnorm_forward batchnorm_forward_nhwc.cpp)
add_example_executable(example_batchnorm_infer batchnorm_infer_nhwc.cpp)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -30,7 +30,7 @@ add_subdirectory(12_reduce) ...@@ -30,7 +30,7 @@ add_subdirectory(12_reduce)
add_subdirectory(13_pool2d_fwd) add_subdirectory(13_pool2d_fwd)
add_subdirectory(14_gemm_xdl_requant_relu_requant) add_subdirectory(14_gemm_xdl_requant_relu_requant)
add_subdirectory(15_grouped_gemm) add_subdirectory(15_grouped_gemm)
add_subdirectory(16_gemm_reduce) add_subdirectory(16_gemm_multi_d_multi_reduces)
add_subdirectory(17_convnd_bwd_data) add_subdirectory(17_convnd_bwd_data)
add_subdirectory(18_batched_gemm_reduce) add_subdirectory(18_batched_gemm_reduce)
add_subdirectory(19_binary_elementwise) add_subdirectory(19_binary_elementwise)
...@@ -42,6 +42,11 @@ add_subdirectory(24_batched_gemm) ...@@ -42,6 +42,11 @@ add_subdirectory(24_batched_gemm)
add_subdirectory(25_gemm_bias_e_permute) add_subdirectory(25_gemm_bias_e_permute)
add_subdirectory(26_contraction) add_subdirectory(26_contraction)
add_subdirectory(27_layernorm) add_subdirectory(27_layernorm)
add_subdirectory(28_grouped_gemm_bias) add_subdirectory(28_grouped_gemm_bias_e_permute)
add_subdirectory(30_grouped_convnd_fwd_bias_relu) add_subdirectory(29_batched_gemm_bias_e_permute)
add_subdirectory(31_splitK_gemm) add_subdirectory(30_grouped_convnd_fwd_bias_relu_add)
add_subdirectory(31_batched_gemm_gemm)
add_subdirectory(32_batched_gemm_scale_softmax_gemm)
add_subdirectory(33_multiple_reduce)
add_subdirectory(34_batchnorm)
add_subdirectory(35_splitK_gemm)
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#pragma once #pragma once
#include "ck/utility/common_header.hpp" #include "ck/utility/common_header.hpp"
#include "ck/utility/sequence_helper.hpp"
#include "ck/tensor_description/multi_index_transform.hpp" #include "ck/tensor_description/multi_index_transform.hpp"
namespace ck { namespace ck {
...@@ -159,6 +160,12 @@ struct TensorDescriptor ...@@ -159,6 +160,12 @@ struct TensorDescriptor
return transforms_[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}]; return transforms_[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}];
} }
__host__ __device__ constexpr auto GetLengths() const
{
// FIXME: use Tuple of reference instead
return generate_sequence_v2([&](auto I) { return GetLength(I); }, Number<ndim_visible_>{});
}
__host__ __device__ constexpr auto GetElementSize() const { return element_size_; } __host__ __device__ constexpr auto GetElementSize() const { return element_size_; }
__host__ __device__ constexpr auto GetElementSpaceSize() const { return element_space_size_; } __host__ __device__ constexpr auto GetElementSpaceSize() const { return element_space_size_; }
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment