Unverified Commit 4eba345f authored by rocking5566's avatar rocking5566 Committed by GitHub
Browse files

Group norm (#417)



* Add groupnorm example by layernorm
1.  Reference is not ready
2. shape of gamma and beta need to be fix

* Let shape of gamma and beta can be same as x

* Modify test, instance and client example

* [What] Fix bug of layernorm for greater than 2 dimension.
[Why] We need to get upper length from merge transform instead of embed transform.

* Add reference for groupnorm

* Fuse sigmoid after groupnorm

* [What] Rename original layernorm into layernorm2d
[Why] Prepare to add groupnorm using layernorm5d

* clang-format

* Add groupnorm test

* Refine error message

* Add groupnorm ckProfiler

* Test groupnorm kernel from device_instance

* update example

* upadte profiler

* Fix test naming

* Fix argc number

* Move descriptor and sweeponce to argument for quick debugging
Co-authored-by: default avatarChao Liu <chao.liu2@amd.com>
parent f584ab0c
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_groupnorm_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestGroupnorm : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using AccDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
void Run()
{
// N, H, W, G, C
std::vector<std::vector<ck::index_t>> lengths = {{1, 1, 1, 1, 1},
{1, 2, 3, 4, 5},
{256, 9, 9, 9, 9},
{1, 64, 64, 32, 10},
{1, 32, 32, 32, 20},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
{
bool success =
ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
AccDataType,
YDataType>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>,
std::tuple<F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestGroupnorm, KernelTypes);
TYPED_TEST(TestGroupnorm, Test_FP32) { this->Run(); }
......@@ -2,28 +2,28 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_layernorm_util.hpp"
#include "test_layernorm2d_util.hpp"
template <ck::index_t N>
using I = ck::Number<N>;
template <typename Tuple>
class TestLayernormFP16 : public ck::TestLayernorm<Tuple>
class TestLayernorm2dFP16 : public ck::TestLayernorm2d<Tuple>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, , GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<8>, I<32>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<4>, I<64>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<2>, I<128>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<1>, I<256>, I<2>, I<8>, I<1>, I<8>, I<8>, I<8>, I<8>>
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim , GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize>
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<8>, I<32>, I<2>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<4>, I<64>, I<2>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<2>, I<128>, I<2>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, ck::half_t, ck::half_t, float, ck::half_t, I<2>, I<1>, I<256>, I<1>, I<256>, I<2>, I<8>, I<1>, I<8>, I<1>, I<8>, I<1>, I<8>, I<8>>
>;
// clang-format on
TYPED_TEST_SUITE(TestLayernormFP16, KernelTypes);
TYPED_TEST(TestLayernormFP16, Test_FP16) { this->Run(); }
TYPED_TEST_SUITE(TestLayernorm2dFP16, KernelTypes);
TYPED_TEST(TestLayernorm2dFP16, Test_FP16) { this->Run(); }
......@@ -2,28 +2,28 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_layernorm_util.hpp"
#include "test_layernorm2d_util.hpp"
template <ck::index_t N>
using I = ck::Number<N>;
template <typename Tuple>
class TestLayernormFP32 : public ck::TestLayernorm<Tuple>
class TestLayernorm2dFP32 : public ck::TestLayernorm2d<Tuple>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, , GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<8>, I<32>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<4>, I<64>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<2>, I<128>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<1>, I<256>, I<2>, I<8>, I<1>, I<4>, I<4>, I<4>, I<4>>
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize>
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<8>, I<32>, I<2>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<4>, I<64>, I<2>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<2>, I<128>, I<2>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, float, float, I<2>, I<1>, I<256>, I<1>, I<256>, I<2>, I<8>, I<1>, I<4>, I<1>, I<4>, I<1>, I<4>, I<4>>
>;
// clang-format on
TYPED_TEST_SUITE(TestLayernormFP32, KernelTypes);
TYPED_TEST(TestLayernormFP32, Test_FP32) { this->Run(); }
TYPED_TEST_SUITE(TestLayernorm2dFP32, KernelTypes);
TYPED_TEST(TestLayernorm2dFP32, Test_FP32) { this->Run(); }
......@@ -31,7 +31,7 @@ std::string serialize_range(const Range& range)
}
template <typename Tuple>
class TestLayernorm : public ::testing::Test
class TestLayernorm2d : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
......@@ -48,9 +48,11 @@ class TestLayernorm : public ::testing::Test
static constexpr index_t KThreadSliceSize = std::tuple_element_t<11, Tuple>{}.value;
static constexpr index_t XYSrcVectorDim = std::tuple_element_t<12, Tuple>{}.value;
static constexpr index_t XSrcVectorSize = std::tuple_element_t<13, Tuple>{}.value;
static constexpr index_t GammaSrcVectorSize = std::tuple_element_t<14, Tuple>{}.value;
static constexpr index_t BetaSrcVectorSize = std::tuple_element_t<15, Tuple>{}.value;
static constexpr index_t YDstVectorSize = std::tuple_element_t<16, Tuple>{}.value;
static constexpr index_t GammaSrcVectorDim = std::tuple_element_t<14, Tuple>{}.value;
static constexpr index_t GammaSrcVectorSize = std::tuple_element_t<15, Tuple>{}.value;
static constexpr index_t BetaSrcVectorDim = std::tuple_element_t<16, Tuple>{}.value;
static constexpr index_t BetaSrcVectorSize = std::tuple_element_t<17, Tuple>{}.value;
static constexpr index_t YDstVectorSize = std::tuple_element_t<18, Tuple>{}.value;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......@@ -78,23 +80,24 @@ class TestLayernorm : public ::testing::Test
KThreadSliceSize,
XYSrcVectorDim,
XSrcVectorSize,
GammaSrcVectorDim,
GammaSrcVectorSize,
BetaSrcVectorDim,
BetaSrcVectorSize,
YDstVectorSize>;
TestLayernorm() : ref_instance_invoker_(ReferenceInstance{}.MakeInvoker()) {}
TestLayernorm2d() : ref_instance_invoker_(ReferenceInstance{}.MakeInvoker()) {}
void RunSingle(std::vector<index_t> lengths, std::vector<index_t> reduceDims)
void RunSingle(const std::vector<index_t>& lengths,
const std::vector<index_t>& reduceDims,
const std::vector<index_t>& GammaLength,
const std::vector<index_t>& GammaStride,
const std::vector<index_t>& BetaLength,
const std::vector<index_t>& BetaStride)
{
std::vector<index_t> reduceLength(reduceDims.size());
for(int i = 0; i < NumReduceDim; ++i)
{
reduceLength[i] = lengths[reduceDims[i]];
}
Tensor<XDataType> x(lengths);
Tensor<GammaDataType> gamma(reduceLength);
Tensor<BetaDataType> beta(reduceLength);
Tensor<GammaDataType> gamma(GammaLength);
Tensor<BetaDataType> beta(BetaLength);
Tensor<YDataType> y(lengths);
Tensor<YDataType> y_ref(lengths);
......@@ -115,10 +118,8 @@ class TestLayernorm : public ::testing::Test
auto argument_ptr = device_instance.MakeArgumentPointer(
lengths,
std::vector<ck::index_t>{x.mDesc.GetStrides().begin(), x.mDesc.GetStrides().end()},
std::vector<ck::index_t>{gamma.mDesc.GetStrides().begin(),
gamma.mDesc.GetStrides().end()},
std::vector<ck::index_t>{beta.mDesc.GetStrides().begin(),
beta.mDesc.GetStrides().end()},
GammaStride,
BetaStride,
std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
reduceDims,
1e-4,
......@@ -163,17 +164,16 @@ class TestLayernorm : public ::testing::Test
void Run()
{
for(auto length : this->lengths_)
std::vector<std::vector<index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
{
this->RunSingle(length, reduceDims_[0]);
this->RunSingle(length, {1}, {length[1]}, {0, 1}, {length[1]}, {0, 1});
}
}
std::vector<std::vector<index_t>> lengths_ = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
std::vector<std::vector<index_t>> reduceDims_ = {{1}};
typename ReferenceInstance::Invoker ref_instance_invoker_;
};
} // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment