Merge remote-tracking branch 'origin/develop' into batched_gemm_c_permute

0b11569f · Chao Liu · e8d3a0fb · fa9a0a5c · 0b11569f · 0b11569f
Commit 0b11569f authored Jul 01, 2022 by Chao Liu
20 changed files
--- a/library/CMakeLists.txt
+++ b/library/CMakeLists.txt
-add_subdirectory(src/host_tensor)
 add_subdirectory(src/tensor_operation_instance/gpu)
+add_subdirectory(src/host_tensor)
 add_subdirectory(src/utility)
--- a/library/include/ck/library/host_tensor/conv_common.hpp
+++ b/library/include/ck/library/host_tensor/conv_common.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include "ck/tensor_description/tensor_descriptor.hpp"

--- a/library/include/ck/library/host_tensor/device_memory.hpp
+++ b/library/include/ck/library/host_tensor/device_memory.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <hip/hip_runtime.h>

--- a/library/include/ck/library/host_tensor/host_common_util.hpp
+++ b/library/include/ck/library/host_tensor/host_common_util.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <vector>

--- a/library/include/ck/library/host_tensor/host_conv.hpp
+++ b/library/include/ck/library/host_tensor/host_conv.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include "host_tensor.hpp"
 #include "conv_common.hpp"

--- a/library/include/ck/library/host_tensor/host_gemm.hpp
+++ b/library/include/ck/library/host_tensor/host_gemm.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include "host_tensor.hpp"

--- a/library/include/ck/library/host_tensor/host_reduction.hpp
+++ b/library/include/ck/library/host_tensor/host_reduction.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <vector>

--- a/library/include/ck/library/host_tensor/host_tensor.hpp
+++ b/library/include/ck/library/host_tensor/host_tensor.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <thread>
@@ -219,6 +222,12 @@ struct Tensor
    Tensor(const Tensor& other) : mDesc(other.mDesc), mData(other.mData) {}
+    Tensor& operator=(const Tensor& other)
+    {
+        mDesc = other.mDesc;
+        mData = other.mData;
+    }
    template <typename F>
    void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank)
    {
@@ -361,13 +370,8 @@ HostTensorDescriptor::HostTensorDescriptor(const std::vector<X>& lens,
 {
 }
-void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream& os = std::cout);
 #if 1
 // FIXME: remove
-void bf16_to_f32_(const Tensor<ck::bhalf_t>& src, Tensor<float>& dst);
-#endif
 template <typename T>
 float check_error(const Tensor<T>& ref, const Tensor<T>& result)
 {
@@ -413,3 +417,4 @@ float check_error(const Tensor<T>& ref, const Tensor<T>& result)
    return linf_error;
 }
+#endif
--- a/library/include/ck/library/host_tensor/host_tensor_generator.hpp
+++ b/library/include/ck/library/host_tensor/host_tensor_generator.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <cmath>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>
@@ -59,20 +62,20 @@ struct ReferenceBatchedGemm : public device::BaseOperator
                for(int k = 0; k < K; ++k)
                {
-                    float v_a;
+                    ADataType v_a;
-                    float v_b;
+                    BDataType v_b;
-                    arg.a_element_op_(v_a, static_cast<const float>(arg.a_g_m_k_(g, m, k)));
+                    arg.a_element_op_(v_a, arg.a_g_m_k_(g, m, k));
-                    arg.b_element_op_(v_b, static_cast<const float>(arg.b_g_k_n_(g, k, n)));
+                    arg.b_element_op_(v_b, arg.b_g_k_n_(g, k, n));
-                    v_acc += v_a * v_b;
+                    v_acc += ck::type_convert<float>(v_a) * ck::type_convert<float>(v_b);
                }
                float v_c;
                arg.c_element_op_(v_c, v_acc);
-                arg.c_g_m_n_(g, m, n) = v_c;
+                arg.c_g_m_n_(g, m, n) = ck::type_convert<CDataType>(v_c);
            };
            make_ParallelTensorFunctor(f_gmk_gkn_gmn,

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>
@@ -60,20 +63,21 @@ struct ReferenceGemm : public device::BaseOperator
                for(int k = 0; k < K; ++k)
                {
-                    AccDataType v_a;
+                    ADataType v_a;
-                    AccDataType v_b;
+                    BDataType v_b;
-                    arg.a_element_op_(v_a, static_cast<const AccDataType>(arg.a_m_k_(m, k)));
+                    arg.a_element_op_(v_a, arg.a_m_k_(m, k));
-                    arg.b_element_op_(v_b, static_cast<const AccDataType>(arg.b_k_n_(k, n)));
+                    arg.b_element_op_(v_b, arg.b_k_n_(k, n));
-                    v_acc += v_a * v_b;
+                    v_acc +=
+                        ck::type_convert<AccDataType>(v_a) * ck::type_convert<AccDataType>(v_b);
                }
                AccDataType v_c;
                arg.c_element_op_(v_c, v_acc);
-                arg.c_m_n_(m, n) = v_c;
+                arg.c_m_n_(m, n) = ck::type_convert<CDataType>(v_c);
            };
            make_ParallelTensorFunctor(

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>

--- a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation_add.hpp
+++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation_add.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
 #pragma once
 #include <iostream>