fix initialization issue

74603261 · Chao Liu · 360184cd · 74603261 · 74603261 · 74603261
Commit 74603261 authored Jul 18, 2022 by Chao Liu
8 changed files
--- a/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
+++ b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
@@ -197,8 +197,8 @@ int run_conv_bwd_data(bool do_verification,
        wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
        break;
    default:
-        out.GenerateTensorValue(GeneratorTensor_1<OutDataType>{1});
+        out.GenerateTensorValue(GeneratorTensor_3<OutDataType>{0.0, 1.0});
-        wei.GenerateTensorValue(GeneratorTensor_1<WeiDataType>{1});
+        wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
    }
    DeviceMem in_device_buf(sizeof(InDataType) * in_device.mDesc.GetElementSpace());

--- a/example/20_convnd_bwd_weight/convnd_bwd_weight_xdl_bf16.cpp
+++ b/example/20_convnd_bwd_weight/convnd_bwd_weight_xdl_bf16.cpp
@@ -6,8 +6,8 @@
 #include "ck/tensor_operation/gpu/device/device_convnd_bwd_weight_nwc_kxc_nwk_xdl_cshuffle.hpp"
 using InDataType = ck::bhalf_t;
-using WeiDataType =
+// bf16 kernel use fp32 atomic add to accumulate Weight tensor into global memory
-    float; // bf16 kernel use fp32 atomic add to accumulate Weight tensor into global memory
+using WeiDataType = float;
 using OutDataType = ck::bhalf_t;
 using AccDataType = float;

--- a/profiler/include/profile_conv_bwd_data_impl.hpp
+++ b/profiler/include/profile_conv_bwd_data_impl.hpp
@@ -154,8 +154,8 @@ bool profile_conv_bwd_data_impl(int do_verification,
        weight.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
        break;
    default:
-        output.GenerateTensorValue(GeneratorTensor_1<OutDataType>{1});
+        output.GenerateTensorValue(GeneratorTensor_3<OutDataType>{0.0, 1.0});
-        weight.GenerateTensorValue(GeneratorTensor_1<WeiDataType>{1});
+        weight.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
    }
    DeviceMem in_device_buf(sizeof(InDataType) * input_device_result.mDesc.GetElementSpace());

--- a/profiler/include/profile_conv_bwd_weight_impl.hpp
+++ b/profiler/include/profile_conv_bwd_weight_impl.hpp
@@ -156,12 +156,12 @@ bool profile_conv_bwd_weight_impl(int do_verification,
    {
    case 0: break;
    case 1:
-        input.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-2, 2});
+        input.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
-        output.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-2, 2});
+        output.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
        break;
    default:
-        input.GenerateTensorValue(GeneratorTensor_1<OutDataType>{1});
+        input.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
-        output.GenerateTensorValue(GeneratorTensor_1<WeiDataType>{1});
+        output.GenerateTensorValue(GeneratorTensor_3<OutDataType>{-0.5, 0.5});
    }
    DeviceMem in_device_buf(sizeof(InDataType) * input.mDesc.GetElementSpace());

--- a/profiler/src/profile_conv_bwd_weight.cpp
+++ b/profiler/src/profile_conv_bwd_weight.cpp
@@ -197,7 +197,8 @@ int profile_conv_bwd_weight(int argc, char* argv[])
        }
        else if(data_type == ConvDataType::BF16_F32_BF16)
        {
-            return profile(I1, NWC{}, KXC{}, NWK{}, BF16{}, BF16{}, BF16{});
+            // fp32 atomic add is used for weight tensor in bf16 kernel
+            return profile(I1, NWC{}, KXC{}, NWK{}, BF16{}, F32{}, BF16{});
        }
    }
    else if(num_dim_spatial == 2 && layout == ConvLayout::NHWC_KYXC_NHWK)
@@ -212,7 +213,8 @@ int profile_conv_bwd_weight(int argc, char* argv[])
        }
        else if(data_type == ConvDataType::BF16_F32_BF16)
        {
-            return profile(I2, NHWC{}, KYXC{}, NHWK{}, BF16{}, BF16{}, BF16{});
+            // fp32 atomic add is used for weight tensor in bf16 kernel
+            return profile(I2, NHWC{}, KYXC{}, NHWK{}, BF16{}, F32{}, BF16{});
        }
    }
    else if(num_dim_spatial == 3 && layout == ConvLayout::NHWC_KYXC_NHWK)
@@ -227,7 +229,8 @@ int profile_conv_bwd_weight(int argc, char* argv[])
        }
        else if(data_type == ConvDataType::BF16_F32_BF16)
        {
-            return profile(I3, NDHWC{}, KZYXC{}, NDHWK{}, BF16{}, BF16{}, BF16{});
+            // fp32 atomic add is used for weight tensor in bf16 kernel
+            return profile(I3, NDHWC{}, KZYXC{}, NDHWK{}, BF16{}, F32{}, BF16{});
        }
    }

--- a/test/convnd_bwd_data/convnd_bwd_data.cpp
+++ b/test/convnd_bwd_data/convnd_bwd_data.cpp
@@ -196,6 +196,6 @@ int main()
    else
    {
        std::cout << "test convnd bwd: Fail " << std::endl;
-        return -1;
+        return 1;
    }
 }
--- a/test/convnd_bwd_weight/CMakeLists.txt
+++ b/test/convnd_bwd_weight/CMakeLists.txt
 add_test_executable(test_convnd_bwd_weight convnd_bwd_weight.cpp) 
-target_link_libraries(test_convnd_bwd_weight PRIVATE utility device_convnd_bwd_weight_instance)
+target_link_libraries(test_convnd_bwd_weight PRIVATE utility device_conv1d_bwd_weight_instance device_conv2d_bwd_weight_instance  device_conv3d_bwd_weight_instance)
--- a/test/convnd_bwd_weight/convnd_bwd_weight.cpp
+++ b/test/convnd_bwd_weight/convnd_bwd_weight.cpp
@@ -7,91 +7,62 @@
 #include <cstdlib>
 #include <vector>
-#include "test/convnd_fwd/conv_util.hpp"
+#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
-#include "profiler/include/profile_convnd_bwd_weight_impl.hpp"
-int test_self()
+int main()
 {
    bool pass = true;
-    std::vector<ck::utils::conv::ConvParams> params;
+    std::vector<ck::tensor_operation::device::ConvParams> params;
+    // check 1d
    params.push_back({1, 128, 256, 256, {1}, {7}, {2}, {1}, {0}, {0}});
    params.push_back({1, 128, 256, 256, {3}, {14}, {1}, {1}, {1}, {1}});
    params.push_back({1, 128, 256, 256, {1}, {3}, {1}, {1}, {0}, {0}});
    for(auto& param : params)
    {
-        // f32
+        // fp32
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<1,
-                                                             float,
+                                                           ck::tensor_layout::convolution::NWC,
-                                                             float,
+                                                           ck::tensor_layout::convolution::KXC,
-                                                             float,
+                                                           ck::tensor_layout::convolution::NWK,
-                                                             ck::tensor_layout::convolution::NWC,
+                                                           float,
-                                                             ck::tensor_layout::convolution::KXC,
+                                                           float,
-                                                             ck::tensor_layout::convolution::NWK>(
+                                                           float>(true,  // do_verification
-            true,  // do_verification
+                                                                  1,     // init_method
-            1,     // init_method
+                                                                  false, // do_log
-            false, // do_log
+                                                                  false, // time_kernel
-            true,  // time_kernel
+                                                                  param,
-            param.N_,
+                                                                  2);
-            param.K_,
-            param.C_,
-            param.input_spatial_lengths_,
-            param.filter_spatial_lengths_,
-            param.GetOutputSpatialLengths(),
-            param.conv_filter_strides_,
-            param.conv_filter_dilations_,
-            param.input_left_pads_,
-            param.input_right_pads_,
-            2);
        // fp16
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<1,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::NWC,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::KXC,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::NWK,
-                                                             ck::tensor_layout::convolution::NWC,
+                                                           ck::half_t,
-                                                             ck::tensor_layout::convolution::KXC,
+                                                           ck::half_t,
-                                                             ck::tensor_layout::convolution::NWK>(
+                                                           ck::half_t>(true,  // do_verification
-            true,  // do_verification
+                                                                       1,     // init_method
-            1,     // init_method
+                                                                       false, // do_log
-            false, // do_log
+                                                                       false, // time_kernel
-            true,  // time_kernel
+                                                                       param,
-            param.N_,
+                                                                       2);
-            param.K_,
-            param.C_,
+        // bf16, wei is f32
-            param.input_spatial_lengths_,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<1,
-            param.filter_spatial_lengths_,
+                                                           ck::tensor_layout::convolution::NWC,
-            param.GetOutputSpatialLengths(),
+                                                           ck::tensor_layout::convolution::KXC,
-            param.conv_filter_strides_,
+                                                           ck::tensor_layout::convolution::NWK,
-            param.conv_filter_dilations_,
+                                                           ck::bhalf_t,
-            param.input_left_pads_,
+                                                           float,
-            param.input_right_pads_,
+                                                           ck::bhalf_t>(true,  // do_verification
-            2);
+                                                                        1,     // init_method
+                                                                        false, // do_log
-        // bf16
+                                                                        false, // time_kernel
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
+                                                                        param,
-                                                             ck::bhalf_t,
+                                                                        2);
-                                                             ck::bhalf_t,
-                                                             ck::bhalf_t,
-                                                             ck::tensor_layout::convolution::NWC,
-                                                             ck::tensor_layout::convolution::KXC,
-                                                             ck::tensor_layout::convolution::NWK>(
-            true,  // do_verification
-            1,     // init_method
-            false, // do_log
-            true,  // time_kernel
-            param.N_,
-            param.K_,
-            param.C_,
-            param.input_spatial_lengths_,
-            param.filter_spatial_lengths_,
-            param.GetOutputSpatialLengths(),
-            param.conv_filter_strides_,
-            param.conv_filter_dilations_,
-            param.input_left_pads_,
-            param.input_right_pads_,
-            2);
    }
    // check 2d
@@ -102,80 +73,50 @@ int test_self()
    for(auto& param : params)
    {
-        // f32
+        // fp32
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
-                                                             float,
+                                                           ck::tensor_layout::convolution::NHWC,
-                                                             float,
+                                                           ck::tensor_layout::convolution::KYXC,
-                                                             float,
+                                                           ck::tensor_layout::convolution::NHWK,
-                                                             ck::tensor_layout::convolution::NHWC,
+                                                           float,
-                                                             ck::tensor_layout::convolution::KYXC,
+                                                           float,
-                                                             ck::tensor_layout::convolution::NHWK>(
+                                                           float>(true,  // do_verification
-            true,  // do_verification
+                                                                  1,     // init_method
-            1,     // init_method
+                                                                  false, // do_log
-            false, // do_log
+                                                                  false, // time_kernel
-            true,  // time_kernel
+                                                                  param,
-            param.N_,
+                                                                  2);
-            param.K_,
-            param.C_,
-            param.input_spatial_lengths_,
-            param.filter_spatial_lengths_,
-            param.GetOutputSpatialLengths(),
-            param.conv_filter_strides_,
-            param.conv_filter_dilations_,
-            param.input_left_pads_,
-            param.input_right_pads_,
-            2);
        // fp16
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::NHWC,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::KYXC,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::NHWK,
-                                                             ck::tensor_layout::convolution::NHWC,
+                                                           ck::half_t,
-                                                             ck::tensor_layout::convolution::KYXC,
+                                                           ck::half_t,
-                                                             ck::tensor_layout::convolution::NHWK>(
+                                                           ck::half_t>(true,  // do_verification
-            true,  // do_verification
+                                                                       1,     // init_method
-            1,     // init_method
+                                                                       false, // do_log
-            false, // do_log
+                                                                       false, // time_kernel
-            true,  // time_kernel
+                                                                       param,
-            param.N_,
+                                                                       2);
-            param.K_,
-            param.C_,
+        // bf16, wei is f32
-            param.input_spatial_lengths_,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
-            param.filter_spatial_lengths_,
+                                                           ck::tensor_layout::convolution::NHWC,
-            param.GetOutputSpatialLengths(),
+                                                           ck::tensor_layout::convolution::KYXC,
-            param.conv_filter_strides_,
+                                                           ck::tensor_layout::convolution::NHWK,
-            param.conv_filter_dilations_,
+                                                           ck::bhalf_t,
-            param.input_left_pads_,
+                                                           float,
-            param.input_right_pads_,
+                                                           ck::bhalf_t>(true,  // do_verification
-            2);
+                                                                        1,     // init_method
+                                                                        false, // do_log
-        // bf16
+                                                                        false, // time_kernel
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
+                                                                        param,
-                                                             ck::bhalf_t,
+                                                                        2);
-                                                             ck::bhalf_t,
-                                                             ck::bhalf_t,
-                                                             ck::tensor_layout::convolution::NHWC,
-                                                             ck::tensor_layout::convolution::KYXC,
-                                                             ck::tensor_layout::convolution::NHWK>(
-            true,  // do_verification
-            1,     // init_method
-            false, // do_log
-            true,  // time_kernel
-            param.N_,
-            param.K_,
-            param.C_,
-            param.input_spatial_lengths_,
-            param.filter_spatial_lengths_,
-            param.GetOutputSpatialLengths(),
-            param.conv_filter_strides_,
-            param.conv_filter_dilations_,
-            param.input_left_pads_,
-            param.input_right_pads_,
-            2);
    }
-    // check 2d
+    // check 3d
    params.clear();
    params.push_back(
        {3, 128, 256, 256, {1, 1, 1}, {4, 4, 4}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
@@ -186,90 +127,49 @@ int test_self()
    for(auto& param : params)
    {
-        // f32
+        // fp32
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<3,
-                                                             float,
+                                                           ck::tensor_layout::convolution::NDHWC,
-                                                             float,
+                                                           ck::tensor_layout::convolution::KZYXC,
-                                                             float,
+                                                           ck::tensor_layout::convolution::NDHWK,
-                                                             ck::tensor_layout::convolution::NDHWC,
+                                                           float,
-                                                             ck::tensor_layout::convolution::KZYXC,
+                                                           float,
-                                                             ck::tensor_layout::convolution::NDHWK>(
+                                                           float>(true,  // do_verification
-            true,  // do_verification
+                                                                  1,     // init_method
-            1,     // init_method
+                                                                  false, // do_log
-            false, // do_log
+                                                                  false, // time_kernel
-            true,  // time_kernel
+                                                                  param,
-            param.N_,
+                                                                  2);
-            param.K_,
-            param.C_,
-            param.input_spatial_lengths_,
-            param.filter_spatial_lengths_,
-            param.GetOutputSpatialLengths(),
-            param.conv_filter_strides_,
-            param.conv_filter_dilations_,
-            param.input_left_pads_,
-            param.input_right_pads_,
-            2);
        // fp16
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<3,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::NDHWC,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::KZYXC,
-                                                             ck::half_t,
+                                                           ck::tensor_layout::convolution::NDHWK,
-                                                             ck::tensor_layout::convolution::NDHWC,
+                                                           ck::half_t,
-                                                             ck::tensor_layout::convolution::KZYXC,
+                                                           ck::half_t,
-                                                             ck::tensor_layout::convolution::NDHWK>(
+                                                           ck::half_t>(true,  // do_verification
-            true,  // do_verification
+                                                                       1,     // init_method
-            1,     // init_method
+                                                                       false, // do_log
-            false, // do_log
+                                                                       false, // time_kernel
-            true,  // time_kernel
+                                                                       param,
-            param.N_,
+                                                                       2);
-            param.K_,
-            param.C_,
+        // bf16, wei is f32
-            param.input_spatial_lengths_,
+        pass &= ck::profiler::profile_conv_bwd_weight_impl<3,
-            param.filter_spatial_lengths_,
+                                                           ck::tensor_layout::convolution::NDHWC,
-            param.GetOutputSpatialLengths(),
+                                                           ck::tensor_layout::convolution::KZYXC,
-            param.conv_filter_strides_,
+                                                           ck::tensor_layout::convolution::NDHWK,
-            param.conv_filter_dilations_,
+                                                           ck::bhalf_t,
-            param.input_left_pads_,
+                                                           float,
-            param.input_right_pads_,
+                                                           ck::bhalf_t>(true,  // do_verification
-            2);
+                                                                        1,     // init_method
+                                                                        false, // do_log
-        // bf16
+                                                                        false, // time_kernel
-        pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
+                                                                        param,
-                                                             ck::bhalf_t,
+                                                                        2);
-                                                             ck::bhalf_t,
-                                                             ck::bhalf_t,
-                                                             ck::tensor_layout::convolution::NDHWC,
-                                                             ck::tensor_layout::convolution::KZYXC,
-                                                             ck::tensor_layout::convolution::NDHWK>(
-            true,  // do_verification
-            1,     // init_method
-            false, // do_log
-            true,  // time_kernel
-            param.N_,
-            param.K_,
-            param.C_,
-            param.input_spatial_lengths_,
-            param.filter_spatial_lengths_,
-            param.GetOutputSpatialLengths(),
-            param.conv_filter_strides_,
-            param.conv_filter_dilations_,
-            param.input_left_pads_,
-            param.input_right_pads_,
-            2);
    }
-    return pass;
-}
-int main()
-{
-    // int data_type   = 1;
-    // int init_method = 1;
-    bool pass = true;
-    pass = test_self();
    if(pass)
    {
        std::cout << "test conv2d bwd weight : Pass" << std::endl;
@@ -278,6 +178,6 @@ int main()
    else
    {
        std::cout << "test conv2d bwd weight: Fail " << std::endl;
-        return -1;
+        return 1;
    }
 }