Merge branch 'develop' into conv_quant_int8

48c85879 · rocking5566 · GitHub · aa71a478 · 1b62bfaa · 48c85879
Unverified Commit 48c85879 authored Oct 13, 2022 by rocking5566 Committed by GitHub Oct 13, 2022
9 changed files
--- a/profiler/src/profile_layernorm.cpp
+++ b/profiler/src/profile_layernorm.cpp
@@ -12,8 +12,7 @@ using ck::index_t;

 struct LayernormArgParser
 {
-    std::unordered_map<std::string, std::vector<int>> long_opts = {
-        {"length", {}}, {"strideXY", {}}, {"strideGamma", {}}, {"strideBeta", {}}};
+    std::unordered_map<std::string, std::vector<int>> long_opts = {{"length", {}}};

    bool parse_opt(int argc, char* argv[], const std::string& key, int i)
    {
@@ -52,9 +51,6 @@ void print_help_layernorm()
              << "arg4: print tensor value (0: no; 1: yes)\n"
              << "arg5: time kernel (0=no, 1=yes)\n"
              << "--length: tensor extents (e.g, --length 1024 1024) \n"
-              << "--strideXY: tensor strides (e.g, --strideXY 1024 1)\n"
-              << "--strideGamma: tensor strides (e.g, --strideGamma 1)\n"
-              << "--strideBeta: tensor strides (e.g, --strideBeta 1)\n"
              << std::endl;
 }

@@ -77,10 +73,7 @@ int profile_layernorm(int argc, char* argv[])

    // parse the long options
    arg_parser(argc, argv);
-    const std::vector<index_t> length      = arg_parser.long_opts["length"];
-    const std::vector<index_t> strideXY    = arg_parser.long_opts["strideXY"];
-    const std::vector<index_t> strideGamma = arg_parser.long_opts["strideGamma"];
-    const std::vector<index_t> strideBeta  = arg_parser.long_opts["strideBeta"];
+    const std::vector<index_t> length = arg_parser.long_opts["length"];

    using F16          = ck::half_t;
    using F32          = float;
@@ -88,25 +81,13 @@ int profile_layernorm(int argc, char* argv[])

    if(data_type == ck::DataTypeEnum::Half)
    {
-        ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, rank>(do_verification,
-                                                                            init_method,
-                                                                            do_log,
-                                                                            time_kernel,
-                                                                            length,
-                                                                            strideXY,
-                                                                            strideGamma,
-                                                                            strideBeta);
+        ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, rank>(
+            do_verification, init_method, do_log, time_kernel, length);
    }
    else if(data_type == ck::DataTypeEnum::Float)
    {
-        ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, rank>(do_verification,
-                                                                            init_method,
-                                                                            do_log,
-                                                                            time_kernel,
-                                                                            length,
-                                                                            strideXY,
-                                                                            strideGamma,
-                                                                            strideBeta);
+        ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, rank>(
+            do_verification, init_method, do_log, time_kernel, length);
    }
    else
    {

--- a/profiler/src/profile_normalization.cpp
+++ b/profiler/src/profile_normalization.cpp
@@ -5,7 +5,7 @@
 #include <vector>
 #include <unordered_map>

-#include "profiler/include/profile_normalization_impl.hpp"
+#include "profiler/include/profile_softmax_impl.hpp"

 using ck::index_t;
 using ck::profiler::NormDataType;
@@ -95,30 +95,29 @@ int profile_normalization(int argc, char* argv[])
    {
        if(data_type == NormDataType::F16_F16)
        {
-            ck::profiler::profile_normalization_impl<ck::half_t, float, ck::half_t, 3>(
-                do_verification,
-                init_method,
-                do_log,
-                time_kernel,
-                length,
-                stride,
-                reduce,
-                float(alpha),
-                float(beta),
-                norm_type);
+            ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 3>(do_verification,
+                                                                                 init_method,
+                                                                                 do_log,
+                                                                                 time_kernel,
+                                                                                 length,
+                                                                                 stride,
+                                                                                 reduce,
+                                                                                 float(alpha),
+                                                                                 float(beta),
+                                                                                 norm_type);
        }
        else if(data_type == NormDataType::F32_F32)
        {
-            ck::profiler::profile_normalization_impl<float, float, float, 3>(do_verification,
-                                                                             init_method,
-                                                                             do_log,
-                                                                             time_kernel,
-                                                                             length,
-                                                                             stride,
-                                                                             reduce,
-                                                                             float(alpha),
-                                                                             float(beta),
-                                                                             norm_type);
+            ck::profiler::profile_softmax_impl<float, float, float, 3>(do_verification,
+                                                                       init_method,
+                                                                       do_log,
+                                                                       time_kernel,
+                                                                       length,
+                                                                       stride,
+                                                                       reduce,
+                                                                       float(alpha),
+                                                                       float(beta),
+                                                                       norm_type);
        }
        else
        {
@@ -129,30 +128,29 @@ int profile_normalization(int argc, char* argv[])
    {
        if(data_type == NormDataType::F16_F16)
        {
-            ck::profiler::profile_normalization_impl<ck::half_t, float, ck::half_t, 4>(
-                do_verification,
-                init_method,
-                do_log,
-                time_kernel,
-                length,
-                stride,
-                reduce,
-                float(alpha),
-                float(beta),
-                norm_type);
+            ck::profiler::profile_softmax_impl<ck::half_t, float, ck::half_t, 4>(do_verification,
+                                                                                 init_method,
+                                                                                 do_log,
+                                                                                 time_kernel,
+                                                                                 length,
+                                                                                 stride,
+                                                                                 reduce,
+                                                                                 float(alpha),
+                                                                                 float(beta),
+                                                                                 norm_type);
        }
        else if(data_type == NormDataType::F32_F32)
        {
-            ck::profiler::profile_normalization_impl<float, float, float, 4>(do_verification,
-                                                                             init_method,
-                                                                             do_log,
-                                                                             time_kernel,
-                                                                             length,
-                                                                             stride,
-                                                                             reduce,
-                                                                             float(alpha),
-                                                                             float(beta),
-                                                                             norm_type);
+            ck::profiler::profile_softmax_impl<float, float, float, 4>(do_verification,
+                                                                       init_method,
+                                                                       do_log,
+                                                                       time_kernel,
+                                                                       length,
+                                                                       stride,
+                                                                       reduce,
+                                                                       float(alpha),
+                                                                       float(beta),
+                                                                       norm_type);
        }
        else
        {

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -6,11 +6,10 @@ include(googletest)

 add_custom_target(tests)

-
 function(add_test_executable TEST_NAME)
    message("adding test ${TEST_NAME}")
    add_executable(${TEST_NAME} ${ARGN})
-    add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> )
+    add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}>)
    add_dependencies(tests ${TEST_NAME})
    add_dependencies(check ${TEST_NAME})
    rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
@@ -23,6 +22,7 @@ function(add_gtest_executable TEST_NAME)
    add_executable(${TEST_NAME} ${ARGN})
    add_dependencies(tests ${TEST_NAME})
    add_dependencies(check ${TEST_NAME})
+
    # suppress gtest warnings
    target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
    target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
@@ -30,7 +30,6 @@ function(add_gtest_executable TEST_NAME)
    rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
 endfunction(add_gtest_executable TEST_NAME)

-
 add_subdirectory(magic_number_division)
 add_subdirectory(space_filling_curve)
 add_subdirectory(conv_util)
@@ -51,5 +50,5 @@ add_subdirectory(convnd_bwd_data)
 add_subdirectory(grouped_convnd_fwd)
 add_subdirectory(block_to_ctile_map)
 add_subdirectory(softmax)
-add_subdirectory(layernorm)
+add_subdirectory(normalization)
 add_subdirectory(data_type)
--- a/test/layernorm/CMakeLists.txt
+++ b/test/layernorm/CMakeLists.txt
--- a/test/layernorm/test_groupnorm_fp16.cpp
+++ b/test/layernorm/test_groupnorm_fp16.cpp
--- a/test/layernorm/test_groupnorm_fp32.cpp
+++ b/test/layernorm/test_groupnorm_fp32.cpp
--- a/test/layernorm/test_layernorm2d_fp16.cpp
+++ b/test/layernorm/test_layernorm2d_fp16.cpp
--- a/test/layernorm/test_layernorm2d_fp32.cpp
+++ b/test/layernorm/test_layernorm2d_fp32.cpp
--- a/test/layernorm/test_layernorm2d_util.hpp
+++ b/test/layernorm/test_layernorm2d_util.hpp
@@ -9,7 +9,7 @@

 #include "ck/ck.hpp"
 #include "ck/utility/number.hpp"
-#include "ck/tensor_operation/gpu/device/device_layernorm_impl.hpp"
+#include "ck/tensor_operation/gpu/device/device_normalization_impl.hpp"

 #include "ck/library/utility/check_err.hpp"
 #include "ck/library/utility/host_tensor.hpp"
@@ -65,26 +65,26 @@ class TestLayernorm2d : public ::testing::Test
                                                                         Rank,
                                                                         NumReduceDim>;

-    using DeviceInstance = tensor_operation::device::DeviceLayernormImpl<XDataType,
-                                                                         GammaDataType,
-                                                                         BetaDataType,
-                                                                         AccDataType,
-                                                                         YDataType,
-                                                                         PassThrough,
-                                                                         Rank,
-                                                                         NumReduceDim,
-                                                                         BlockSize,
-                                                                         MThreadClusterSize,
-                                                                         KThreadClusterSize,
-                                                                         MThreadSliceSize,
-                                                                         KThreadSliceSize,
-                                                                         XYSrcVectorDim,
-                                                                         XSrcVectorSize,
-                                                                         GammaSrcVectorDim,
-                                                                         GammaSrcVectorSize,
-                                                                         BetaSrcVectorDim,
-                                                                         BetaSrcVectorSize,
-                                                                         YDstVectorSize>;
+    using DeviceInstance = tensor_operation::device::DeviceNormalizationImpl<XDataType,
+                                                                             GammaDataType,
+                                                                             BetaDataType,
+                                                                             AccDataType,
+                                                                             YDataType,
+                                                                             PassThrough,
+                                                                             Rank,
+                                                                             NumReduceDim,
+                                                                             BlockSize,
+                                                                             MThreadClusterSize,
+                                                                             KThreadClusterSize,
+                                                                             MThreadSliceSize,
+                                                                             KThreadSliceSize,
+                                                                             XYSrcVectorDim,
+                                                                             XSrcVectorSize,
+                                                                             GammaSrcVectorDim,
+                                                                             GammaSrcVectorSize,
+                                                                             BetaSrcVectorDim,
+                                                                             BetaSrcVectorSize,
+                                                                             YDstVectorSize>;

    TestLayernorm2d() : ref_instance_invoker_(ReferenceInstance{}.MakeInvoker()) {}