Tiny fix in using data type template parameters in blockwise and direct_threadwise kernel

a18e6481 · Qianfeng Zhang · 9e80cdce · a18e6481 · a18e6481
Commit a18e6481 authored Sep 15, 2021 by Qianfeng Zhang
2 changed files
--- a/composable_kernel/include/tensor_operation/gridwise_generic_2d_reduction_blockwise.hpp
+++ b/composable_kernel/include/tensor_operation/gridwise_generic_2d_reduction_blockwise.hpp
@@ -281,7 +281,7 @@ struct GridwiseReduction_xy_to_x_blockwise
                                            ThreadClusterLengths,
                                            Sequence<0, 1>,
                                            srcDataType,
-                                            dstDataType,
+                                            compType,
                                            src2dDescType,
                                            decltype(in_block_desc),
                                            Sequence<0, 1>,

--- a/composable_kernel/include/tensor_operation/gridwise_generic_2d_reduction_direct_threadwise.hpp
+++ b/composable_kernel/include/tensor_operation/gridwise_generic_2d_reduction_direct_threadwise.hpp
@@ -232,7 +232,7 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
        index_t thread_global_1d_id = get_block_1d_id() * BlockSize + get_thread_local_1d_id();
        auto threadwise_src_load = ThreadwiseTensorSliceTransfer_v2<srcDataType,
-                                                                    dstDataType,
+                                                                    compType,
                                                                    src2dDescType,
                                                                    decltype(ThreadBufferDesc),
                                                                    ThreadBufferLengths,
@@ -377,7 +377,7 @@ struct GridwiseReduction_xy_to_x_direct_threadwise
        index_t thread_global_1d_id = get_block_1d_id() * BlockSize + get_thread_local_1d_id();
        auto threadwise_src_val_load = ThreadwiseTensorSliceTransfer_v2<srcDataType,
-                                                                        dstDataType,
+                                                                        compType,
                                                                        src2dDescType,
                                                                        decltype(ThreadBufferDesc),
                                                                        ThreadBufferLengths,