Commit ca313a29 authored by letaoqin's avatar letaoqin
Browse files

Merge branch 'develop' into dl_conv_multiple_d

parents d47bf127 8784a72e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <optional>
#include <string_view>
#include <utility>
class ProfilerOperationRegistry final
{
ProfilerOperationRegistry() = default;
~ProfilerOperationRegistry() = default;
public:
using Operation = std::function<int(int, char*[])>;
private:
struct Entry final
{
explicit Entry(std::string_view description, Operation operation) noexcept
: description_(description), operation_(std::move(operation))
{
}
std::string_view description_;
Operation operation_;
};
std::map<std::string_view, Entry> entries_;
friend std::ostream& operator<<(std::ostream& stream, const ProfilerOperationRegistry& registry)
{
stream << "{\n";
for(auto& [name, entry] : registry.entries_)
{
stream << "\t" << name << ": " << entry.description_ << "\n";
}
stream << "}";
return stream;
}
public:
static ProfilerOperationRegistry& GetInstance()
{
static ProfilerOperationRegistry registry;
return registry;
}
std::optional<Operation> Get(std::string_view name) const
{
const auto found = entries_.find(name);
if(found == end(entries_))
{
return std::nullopt;
}
return (found->second).operation_;
}
bool Add(std::string_view name, std::string_view description, Operation operation)
{
return entries_
.emplace(std::piecewise_construct,
std::forward_as_tuple(name),
std::forward_as_tuple(description, std::move(operation)))
.second;
}
};
#define PP_CONCAT(x, y) PP_CONCAT_IMPL(x, y)
#define PP_CONCAT_IMPL(x, y) x##y
#define REGISTER_PROFILER_OPERATION(name, description, operation) \
static const bool PP_CONCAT(operation_registration_result_, __COUNTER__) = \
::ProfilerOperationRegistry::GetInstance().Add(name, description, operation)
include_directories(BEFORE include_directories(BEFORE
${PROJECT_SOURCE_DIR}/ ${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/profiler/include
) )
include(googletest) include(googletest)
...@@ -53,4 +54,4 @@ add_subdirectory(softmax) ...@@ -53,4 +54,4 @@ add_subdirectory(softmax)
add_subdirectory(normalization) add_subdirectory(normalization)
add_subdirectory(data_type) add_subdirectory(data_type)
add_subdirectory(elementwise_normalization) add_subdirectory(elementwise_normalization)
add_subdirectory(batchnorm_fwd) add_subdirectory(batchnorm)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp" #include "profiler/profile_batched_gemm_impl.hpp"
namespace { namespace {
using ADataType = ck::bhalf_t; using ADataType = ck::bhalf_t;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp" #include "profiler/profile_batched_gemm_impl.hpp"
namespace { namespace {
using ADataType = ck::half_t; using ADataType = ck::half_t;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp" #include "profiler/profile_batched_gemm_impl.hpp"
namespace { namespace {
using ADataType = float; using ADataType = float;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
#include "profiler/include/profile_batched_gemm_impl.hpp" #include "profiler/profile_batched_gemm_impl.hpp"
namespace { namespace {
using ADataType = int8_t; using ADataType = int8_t;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <vector> #include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_gemm_impl.hpp" #include "profiler/profile_batched_gemm_gemm_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization; using ck::tensor_operation::device::GemmSpecialization;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
#include "profiler/include/profile_batched_gemm_reduce_impl.hpp" #include "profiler/profile_batched_gemm_reduce_impl.hpp"
int main() int main()
{ {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <vector> #include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp" #include "profiler/profile_batched_gemm_softmax_gemm_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization; using ck::tensor_operation::device::GemmSpecialization;
template <ck::index_t N> template <ck::index_t N>
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_softmax_gemm_permute_impl.hpp" #include "profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization; using ck::tensor_operation::device::GemmSpecialization;
using ck::tensor_operation::device::MaskingSpecialization; using ck::tensor_operation::device::MaskingSpecialization;
......
add_gtest_executable(test_batchnorm_fwd_rank_4 batchnorm_fwd_rank_4.cpp) add_gtest_executable(test_batchnorm_fwd_rank_4 batchnorm_fwd_rank_4.cpp)
add_gtest_executable(test_batchnorm_bwd_rank_4 batchnorm_bwd_rank_4.cpp)
target_link_libraries(test_batchnorm_fwd_rank_4 PRIVATE utility device_batchnorm_instance) target_link_libraries(test_batchnorm_fwd_rank_4 PRIVATE utility device_batchnorm_instance)
target_link_libraries(test_batchnorm_bwd_rank_4 PRIVATE utility device_batchnorm_instance)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/profile_batchnorm_backward_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using BF16 = ck::bhalf_t;
using F64 = double;
template <typename Tuple>
class TestBatchNormBwdRank4 : public ::testing::Test
{
private:
const double epsilon = std::numeric_limits<float>::epsilon();
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using DxDataType = std::tuple_element_t<1, Tuple>;
using DyDataType = std::tuple_element_t<2, Tuple>;
using AccDataType = std::tuple_element_t<3, Tuple>;
using ScaleDataType = std::tuple_element_t<4, Tuple>;
using BiasDataType = std::tuple_element_t<5, Tuple>;
using MeanVarDataType = std::tuple_element_t<6, Tuple>;
std::vector<std::vector<size_t>> list_of_lengths = {
{128, 16, 3, 1024}, {128, 16, 6, 512}, {1, 1, 1, 1}, {4, 4, 4, 4}, {32, 32, 32, 32}};
std::vector<int> reduceDims;
template <int NumReduceDim>
void Run()
{
for(auto& inOutLengths : list_of_lengths)
{
bool pass = true;
EXPECT_FALSE(reduceDims.size() != NumReduceDim);
pass = pass && ck::profiler::profile_batchnorm_backward_impl<XDataType,
DxDataType,
DyDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, true, epsilon);
pass = pass && ck::profiler::profile_batchnorm_backward_impl<XDataType,
DxDataType,
DyDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, false, epsilon);
EXPECT_TRUE(pass);
}
}
};
using KernelTypes = ::testing::Types<std::tuple<F16, F32, F32, F32, F16, F32, F32>,
std::tuple<F32, F32, F32, F32, F32, F32, F32>,
std::tuple<BF16, F32, F32, F32, BF16, F32, F32>,
std::tuple<F64, F64, F64, F64, F64, F64, F64>>;
TYPED_TEST_SUITE(TestBatchNormBwdRank4, KernelTypes);
// nhwc
TYPED_TEST(TestBatchNormBwdRank4, nhwc)
{
this->reduceDims = {0, 1, 2};
this->template Run<3>();
}
// nchw
TYPED_TEST(TestBatchNormBwdRank4, nchw)
{
this->reduceDims = {0, 2, 3};
this->template Run<3>();
}
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <tuple> #include <tuple>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/include/profile_batchnorm_forward_impl.hpp" #include "profiler/profile_batchnorm_forward_impl.hpp"
using F16 = ck::half_t; using F16 = ck::half_t;
using F32 = float; using F32 = float;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <tuple> #include <tuple>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/include/profile_conv_bwd_data_impl.hpp" #include "profiler/profile_conv_bwd_data_impl.hpp"
template <typename Tuple> template <typename Tuple>
class TestConvndBwdData : public ::testing::Test class TestConvndBwdData : public ::testing::Test
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <tuple> #include <tuple>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/include/profile_conv_fwd_impl.hpp" #include "profiler/profile_conv_fwd_impl.hpp"
template <typename Tuple> template <typename Tuple>
class TestConvndFwd : public ::testing::Test class TestConvndFwd : public ::testing::Test
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "profiler/include/profile_elementwise_layernorm_impl.hpp" #include "profiler/profile_elementwise_layernorm_impl.hpp"
using F16 = ck::half_t; using F16 = ck::half_t;
using F32 = float; using F32 = float;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <iostream> #include <iostream>
#include "profiler/include/profile_gemm_reduce_impl.hpp" #include "profiler/profile_gemm_reduce_impl.hpp"
int main() int main()
{ {
......
...@@ -226,9 +226,8 @@ int main(int argc, char* argv[]) ...@@ -226,9 +226,8 @@ int main(int argc, char* argv[])
std::vector<gemmArgs> test_cases; std::vector<gemmArgs> test_cases;
if(argc == 1) if(argc == 1)
{ {
test_cases = {{GemmMatrixLayout::MK_KN_MN, 3, 3, 3, 3, 3, 3, 1}}; test_cases = {{GemmMatrixLayout::MK_KN_MN, 1024, 1024, 1024, 1024, 1024, 1024, 2},
// JD: Populate with more and meaningful {GemmMatrixLayout::MK_KN_MN, 1024, 1024, 1024, 1024, 1024, 1024, 8}};
return 0;
} }
else if(argc == 9) else if(argc == 9)
{ {
...@@ -253,11 +252,10 @@ int main(int argc, char* argv[]) ...@@ -253,11 +252,10 @@ int main(int argc, char* argv[])
printf("arg2 to 7: M, N, K, StrideA, StrideB, StrideC KBatch\n"); printf("arg2 to 7: M, N, K, StrideA, StrideB, StrideC KBatch\n");
return -1; return -1;
} }
bool error = false;
for(const auto& kinder : test_cases) for(const auto& kinder : test_cases)
{ {
const auto res = test_gemm(kinder); error |= test_gemm(kinder);
if(!res)
return -1;
} }
return 0; return error ? 1 : 0;
} }
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/include/profile_grouped_conv_bwd_weight_impl.hpp" #include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
template <typename Tuple> template <typename Tuple>
class TestGroupedConvndBwdWeight : public ::testing::Test class TestGroupedConvndBwdWeight : public ::testing::Test
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <vector> #include <vector>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/include/profile_grouped_conv_fwd_impl.hpp" #include "profiler/profile_grouped_conv_fwd_impl.hpp"
class TestGroupedConvNdFwd : public ::testing::Test class TestGroupedConvNdFwd : public ::testing::Test
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment