Commit cba8f7f2 authored by Anthony Chang's avatar Anthony Chang
Browse files

Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4

parents cc50b687 b653c5eb
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm_reduce.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_reduce.hpp"
#include "gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp" #include "ck/utility/common_header.hpp"
#include "device_prop.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "device_base.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "device_gemm.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "common_header.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/device_utility/device_prop.hpp"
#include "gemm_specialization.hpp" #include "ck/device_utility/kernel_launch.hpp"
#include "element_wise_operation.hpp"
#include "gridwise_gemm_dl_v1r3.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <array> #include <array>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp" #include "ck/utility/common_header.hpp"
#include "device_gemm_multiple_d.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "gridwise_gemm_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp"
#include "gemm_specialization.hpp" #include "ck/device_utility/device_prop.hpp"
#include "device_prop.hpp" #include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include "device_base.hpp" #include "device_base.hpp"
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm_reduce.hpp" #include "ck/utility/common_header.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_reduce.hpp"
#include "gridwise_gemm_reduce_xdl_cshuffle_v1.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "gemm_specialization.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_prop.hpp" #include "ck/utility/common_header.hpp"
#include "device_base.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "device_gemm.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "common_header.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp"
#include "gridwise_gemm_xdlops_v2r3.hpp" #include "ck/device_utility/device_prop.hpp"
#include "gemm_specialization.hpp" #include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm_bias.hpp" #include "ck/utility/common_header.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_bias.hpp"
#include "gridwise_gemm_xdlops_v3r2.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
#ifndef DEVICE_GEMM_XDL_C_SHUFFLE_BIAS_ACTIVATION_HPP // SPDX-License-Identifier: MIT
#define DEVICE_GEMM_XDL_C_SHUFFLE_BIAS_ACTIVATION_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm_bias_activation.hpp" #include "ck/utility/common_header.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_bias_activation.hpp"
#include "gridwise_gemm_xdlops_v3r2.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -513,4 +518,3 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation ...@@ -513,4 +518,3 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_GEMM_XDL_C_SHUFFLE_BIAS_ACTIVATION_ADD_HPP // SPDX-License-Identifier: MIT
#define DEVICE_GEMM_XDL_C_SHUFFLE_BIAS_ACTIVATION_ADD_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm_bias_activation_add.hpp" #include "ck/utility/common_header.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_bias_activation_add.hpp"
#include "gridwise_gemm_xdlops_v3r3.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -573,4 +578,3 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add ...@@ -573,4 +578,3 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm.hpp" #include "ck/utility/common_header.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "gridwise_gemm_xdl_cshuffle_v1.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp"
#include "device_prop.hpp" #include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
#ifndef DEVICE_GEMM_SPLITK_XDL_HPP // SPDX-License-Identifier: MIT
#define DEVICE_GEMM_SPLITK_XDL_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_base.hpp" #include "ck/utility/common_header.hpp"
#include "device_gemm.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "gridwise_gemm_xdlops_v2r4.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp"
#include "gemm_specialization.hpp" #include "ck/device_utility/device_prop.hpp"
#include "device_prop.hpp" #include "ck/device_utility/kernel_launch.hpp"
#ifndef CK_RUN_KERNEL_AND_TIME
#define CK_RUN_KERNEL_AND_TIME 1
#endif
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -639,4 +637,3 @@ struct DeviceGemmXdlSplitK ...@@ -639,4 +637,3 @@ struct DeviceGemmXdlSplitK
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_GEMM_XDL_SPLITK_C_SHUFFLE_HPP // SPDX-License-Identifier: MIT
#define DEVICE_GEMM_XDL_SPLITK_C_SHUFFLE_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_base.hpp" #include "ck/utility/common_header.hpp"
#include "device_gemm.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "gridwise_gemm_xdlops_v2r4r2.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp"
#include "gemm_specialization.hpp" #include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
#ifndef CK_RUN_KERNEL_AND_TIME
#define CK_RUN_KERNEL_AND_TIME 1
#endif
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -641,4 +640,3 @@ struct DeviceGemmXdlSplitKCShuffle ...@@ -641,4 +640,3 @@ struct DeviceGemmXdlSplitKCShuffle
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_GROUPED_GEMM_XDL_HPP // SPDX-License-Identifier: MIT
#define DEVICE_GROUPED_GEMM_XDL_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_base.hpp" #include "ck/utility/common_header.hpp"
#include "device_gemm.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "gridwise_gemm_xdlops_v2r3.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp"
#include "gemm_specialization.hpp" #include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -638,4 +641,3 @@ struct DeviceGroupedGemmXdl ...@@ -638,4 +641,3 @@ struct DeviceGroupedGemmXdl
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_POOL2D_FWD_HPP // SPDX-License-Identifier: MIT
#define DEVICE_POOL2D_FWD_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <array> #include <array>
#include "device_base.hpp"
#include "reduction_enums.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/utility/reduction_enums.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -35,4 +38,3 @@ using DevicePool2dFwdPtr = std::unique_ptr<DevicePool2dFwd<ReduceOpId>>; ...@@ -35,4 +38,3 @@ using DevicePool2dFwdPtr = std::unique_ptr<DevicePool2dFwd<ReduceOpId>>;
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_POOL2D_FWD_NHWC_NHWC_HPP // SPDX-License-Identifier: MIT
#define DEVICE_POOL2D_FWD_NHWC_NHWC_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device_pool2d_fwd.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "reduction_operator_mapping.hpp" #include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "gridwise_2d_reduction_threadwise.hpp" #include "ck/tensor_operation/gpu/device/device_pool2d_fwd.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -315,9 +320,8 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd ...@@ -315,9 +320,8 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
return str.str(); return str.str();
} }
}; // namespace device };
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_REDUCE_HPP // SPDX-License-Identifier: MIT
#define DEVICE_REDUCE_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <iostream> #include <iostream>
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "device_base.hpp" #include "ck/utility/reduction_enums.hpp"
#include "reduction_enums.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -41,4 +43,3 @@ using DeviceReducePtr = ...@@ -41,4 +43,3 @@ using DeviceReducePtr =
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_REDUCE_COMMON_HPP // SPDX-License-Identifier: MIT
#define DEVICE_REDUCE_COMMON_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector> #include <vector>
#include <cassert> #include <cassert>
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "reduction_enums.hpp" #include "ck/utility/reduction_enums.hpp"
#include "reduction_operator.hpp" #include "ck/utility/reduction_operator.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -85,6 +87,4 @@ std::vector<index_t> shuffle_tensor_dimensions(const std::vector<index_t>& origL ...@@ -85,6 +87,4 @@ std::vector<index_t> shuffle_tensor_dimensions(const std::vector<index_t>& origL
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_REDUCE_MULTIBLOCK_HPP // SPDX-License-Identifier: MIT
#define DEVICE_REDUCE_MULTIBLOCK_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_base.hpp" #include "ck/utility/common_header.hpp"
#include "device_reduce.hpp" #include "ck/utility/reduction_operator.hpp"
#include "device_reduce_common.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "gridwise_2d_reduction_multiblock.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "gridwise_set_buffer_value.hpp" #include "ck/tensor_operation/gpu/device/device_reduce.hpp"
#include "reduction_operator.hpp" #include "ck/tensor_operation/gpu/device/device_reduce_common.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -390,10 +396,8 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InElementwiseOperation, AccE ...@@ -390,10 +396,8 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InElementwiseOperation, AccE
}; };
}; };
bool IsSupportedArgument(const BaseArgument* p_arg) override static bool IsSupportedArgument(const Argument* pArg)
{ {
const Argument* pArg = dynamic_cast<const Argument*>(p_arg);
if constexpr(use_multiblock) if constexpr(use_multiblock)
{ {
if(static_cast<float>(pArg->beta_) != 0.0f) if(static_cast<float>(pArg->beta_) != 0.0f)
...@@ -442,11 +446,16 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InElementwiseOperation, AccE ...@@ -442,11 +446,16 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InElementwiseOperation, AccE
else else
{ {
// cases with very small reduce_total_length should be handled by ThreadWise kernel // cases with very small reduce_total_length should be handled by ThreadWise kernel
if(pArg->reduce_total_length / KThreadSliceSize < 2) // if(pArg->reduce_total_length / KThreadSliceSize < 2)
return (false); // return (false);
}; };
return (true); return (true);
}
bool IsSupportedArgument(const BaseArgument* p_arg) override
{
return IsSupportedArgument(dynamic_cast<const Argument*>(p_arg));
}; };
std::unique_ptr<BaseArgument> std::unique_ptr<BaseArgument>
...@@ -502,4 +511,3 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InElementwiseOperation, AccE ...@@ -502,4 +511,3 @@ struct DeviceReduceMultiBlock : public DeviceReduce<InElementwiseOperation, AccE
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
#ifndef DEVICE_REDUCE_THREADWISE_HPP // SPDX-License-Identifier: MIT
#define DEVICE_REDUCE_THREADWISE_HPP // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_reduce.hpp" #include "ck/device_utility/device_prop.hpp"
#include "device_reduce_common.hpp" #include "ck/device_utility/kernel_launch.hpp"
#include "gridwise_2d_reduction_multiblock.hpp" #include "ck/tensor_operation/gpu/device/device_reduce.hpp"
#include "gridwise_2d_reduction_threadwise.hpp" #include "ck/tensor_operation/gpu/device/device_reduce_common.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -370,4 +374,3 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, AccE ...@@ -370,4 +374,3 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, AccE
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment