Unverified Commit 36c9a825 authored by Bartłomiej Kocot's avatar Bartłomiej Kocot Committed by GitHub
Browse files

Merge branch 'develop' into barkocot/grouped-conv-fwd-multiple-ab

parents 25e4687b 68f2b5e7
...@@ -28,7 +28,8 @@ template <typename XDataType, ...@@ -28,7 +28,8 @@ template <typename XDataType,
struct ReferenceLayernorm : public device::BaseOperator struct ReferenceLayernorm : public device::BaseOperator
{ {
// TODO - support generic layernorm // TODO - support generic layernorm
static_assert((Rank == 2 && NumReduceDim == 1), "Only support 2D version so far"); static_assert((Rank == 2 && NumReduceDim == 1) || (Rank == 4 && NumReduceDim == 3),
"Only support 2D & 4D version so far");
// Argument // Argument
struct Argument : public device::BaseArgument struct Argument : public device::BaseArgument
...@@ -71,7 +72,7 @@ struct ReferenceLayernorm : public device::BaseOperator ...@@ -71,7 +72,7 @@ struct ReferenceLayernorm : public device::BaseOperator
// Invoker // Invoker
struct Invoker : public device::BaseInvoker struct Invoker : public device::BaseInvoker
{ {
float Run(const Argument& arg) float Run2D(const Argument& arg)
{ {
int M = arg.lengths_[0]; int M = arg.lengths_[0];
int N = arg.lengths_[1]; int N = arg.lengths_[1];
...@@ -117,6 +118,71 @@ struct ReferenceLayernorm : public device::BaseOperator ...@@ -117,6 +118,71 @@ struct ReferenceLayernorm : public device::BaseOperator
return 0; return 0;
} }
float Run4D(const Argument& arg)
{
int N = arg.lengths_[0];
int H = arg.lengths_[1];
int W = arg.lengths_[2];
int C = arg.lengths_[3];
Tensor<ComputeDataType> mean({N});
Tensor<ComputeDataType> var({N});
int reduce_length = H * W * C;
for(int n = 0; n < N; ++n)
{
mean(n) = 0;
var(n) = 0;
for(int h = 0; h < H; ++h)
for(int w = 0; w < W; ++w)
for(int c = 0; c < C; ++c)
{
auto x_val = ck::type_convert<ComputeDataType>(arg.x_m_n_(n, h, w, c));
mean(n) += x_val;
var(n) += x_val * x_val;
}
mean(n) = mean(n) / reduce_length;
var(n) = (var(n) / reduce_length) - (mean(n) * mean(n));
}
for(int n = 0; n < N; ++n)
{
ComputeDataType divisor =
static_cast<ComputeDataType>(1) / ck::math::sqrt(var(n) + arg.epsilon_);
for(int h = 0; h < H; ++h)
for(int w = 0; w < W; ++w)
for(int c = 0; c < C; ++c)
{
auto x_val = ck::type_convert<ComputeDataType>(arg.x_m_n_(n, h, w, c));
auto gamma_val =
ck::type_convert<ComputeDataType>(arg.gamma_n_(h, w, c));
auto beta_val = ck::type_convert<ComputeDataType>(arg.beta_n_(h, w, c));
auto y_val = (x_val - mean(n)) * divisor;
y_val = (y_val * gamma_val) + beta_val;
arg.y_elementwise_op_(y_val, y_val);
arg.y_m_n_(n, h, w, c) = ck::type_convert<YDataType>(y_val);
}
arg.save_mean_m_(n) = ck::type_convert<SaveMeanInvStdDataType>(mean(n));
arg.save_inv_std_m_(n) = ck::type_convert<SaveMeanInvStdDataType>(divisor);
}
return 0;
}
float Run(const Argument& arg)
{
if(arg.lengths_.size() == 2)
return Run2D(arg);
else if(arg.lengths_.size() == 4)
return Run4D(arg);
return 0;
}
float Run(const device::BaseArgument* p_arg, float Run(const device::BaseArgument* p_arg,
const StreamConfig& /* stream_config */ = StreamConfig{}) override const StreamConfig& /* stream_config */ = StreamConfig{}) override
{ {
...@@ -134,17 +200,16 @@ struct ReferenceLayernorm : public device::BaseOperator ...@@ -134,17 +200,16 @@ struct ReferenceLayernorm : public device::BaseOperator
{ {
const Argument* p_arg_ = dynamic_cast<const Argument*>(p_arg); const Argument* p_arg_ = dynamic_cast<const Argument*>(p_arg);
// TODO - support generic layernorm if(p_arg_->lengths_.size() == 2 && p_arg_->reduceDims_.size() == 1 &&
if(p_arg_->lengths_.size() != 2) p_arg_->reduceDims_[0] == 1)
return false; return true;
if(p_arg_->reduceDims_.size() != 1) else if(p_arg_->lengths_.size() == 4 && p_arg_->reduceDims_.size() == 3 &&
return false; p_arg_->reduceDims_[0] == 1 && p_arg_->reduceDims_[1] == 2 &&
p_arg_->reduceDims_[2] == 3)
return true;
if(p_arg_->reduceDims_[0] != 1)
return false; return false;
return true;
} }
static auto MakeArgument(const Tensor<XDataType>& x_m_n, static auto MakeArgument(const Tensor<XDataType>& x_m_n,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <memory> #include <memory>
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_normalization.hpp" #include "ck/tensor_operation/gpu/device/device_normalization_fwd.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp" #include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
...@@ -18,25 +18,31 @@ namespace device { ...@@ -18,25 +18,31 @@ namespace device {
namespace instance { namespace instance {
#ifdef CK_ENABLE_FP16 #ifdef CK_ENABLE_FP16
// FP16 // FP16
void add_device_normalization_rank_2_1_f16_instances( void add_device_normalization_fwd_rank_2_1_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, PassThrough, 2, 1>>>&); std::vector<
std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, PassThrough, 2, 1>>>&);
void add_device_normalization_rank_4_3_f16_instances( void add_device_normalization_fwd_rank_4_3_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, PassThrough, 4, 3>>>&); std::vector<
std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, PassThrough, 4, 3>>>&);
void add_device_normalization_rank_5_3_f16_instances( void add_device_normalization_fwd_rank_5_3_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, PassThrough, 5, 3>>>&); std::vector<
std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, PassThrough, 5, 3>>>&);
#endif #endif
#ifdef CK_ENABLE_FP32 #ifdef CK_ENABLE_FP32
// FP32 // FP32
void add_device_normalization_rank_2_1_f32_instances( void add_device_normalization_fwd_rank_2_1_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, PassThrough, 2, 1>>>&); std::vector<
std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, PassThrough, 2, 1>>>&);
void add_device_normalization_rank_4_3_f32_instances( void add_device_normalization_fwd_rank_4_3_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, PassThrough, 4, 3>>>&); std::vector<
std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, PassThrough, 4, 3>>>&);
void add_device_normalization_rank_5_3_f32_instances( void add_device_normalization_fwd_rank_5_3_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, PassThrough, 5, 3>>>&); std::vector<
std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, PassThrough, 5, 3>>>&);
#endif #endif
template <typename XDataType, template <typename XDataType,
typename GammaDataType, typename GammaDataType,
...@@ -45,7 +51,7 @@ template <typename XDataType, ...@@ -45,7 +51,7 @@ template <typename XDataType,
typename SaveMeanInvStdDataType, typename SaveMeanInvStdDataType,
index_t Rank, index_t Rank,
index_t NumReduceDim> index_t NumReduceDim>
struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormalization< struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormalizationFwd<
XDataType, XDataType,
GammaDataType, GammaDataType,
BetaDataType, BetaDataType,
...@@ -55,7 +61,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormal ...@@ -55,7 +61,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormal
Rank, Rank,
NumReduceDim>> NumReduceDim>>
{ {
using DeviceOp = DeviceNormalization<XDataType, using DeviceOp = DeviceNormalizationFwd<XDataType,
GammaDataType, GammaDataType,
BetaDataType, BetaDataType,
YDataType, YDataType,
...@@ -74,15 +80,15 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormal ...@@ -74,15 +80,15 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormal
{ {
if constexpr(Rank == 2 && NumReduceDim == 1) if constexpr(Rank == 2 && NumReduceDim == 1)
{ {
add_device_normalization_rank_2_1_f16_instances(op_ptrs); add_device_normalization_fwd_rank_2_1_f16_instances(op_ptrs);
} }
else if constexpr(Rank == 4 && NumReduceDim == 3) else if constexpr(Rank == 4 && NumReduceDim == 3)
{ {
add_device_normalization_rank_4_3_f16_instances(op_ptrs); add_device_normalization_fwd_rank_4_3_f16_instances(op_ptrs);
} }
else if constexpr(Rank == 5 && NumReduceDim == 3) else if constexpr(Rank == 5 && NumReduceDim == 3)
{ {
add_device_normalization_rank_5_3_f16_instances(op_ptrs); add_device_normalization_fwd_rank_5_3_f16_instances(op_ptrs);
} }
} }
#endif #endif
...@@ -93,15 +99,15 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormal ...@@ -93,15 +99,15 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceNormal
{ {
if constexpr(Rank == 2 && NumReduceDim == 1) if constexpr(Rank == 2 && NumReduceDim == 1)
{ {
add_device_normalization_rank_2_1_f32_instances(op_ptrs); add_device_normalization_fwd_rank_2_1_f32_instances(op_ptrs);
} }
else if constexpr(Rank == 4 && NumReduceDim == 3) else if constexpr(Rank == 4 && NumReduceDim == 3)
{ {
add_device_normalization_rank_4_3_f32_instances(op_ptrs); add_device_normalization_fwd_rank_4_3_f32_instances(op_ptrs);
} }
else if constexpr(Rank == 5 && NumReduceDim == 3) else if constexpr(Rank == 5 && NumReduceDim == 3)
{ {
add_device_normalization_rank_5_3_f32_instances(op_ptrs); add_device_normalization_fwd_rank_5_3_f32_instances(op_ptrs);
} }
} }
#endif #endif
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_normalization.hpp" #include "ck/tensor_operation/gpu/device/device_normalization_fwd.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp" #include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
...@@ -18,16 +18,16 @@ namespace device { ...@@ -18,16 +18,16 @@ namespace device {
namespace instance { namespace instance {
// FP16 // FP16
void add_device_normalization_rank_5_3_swish_f16_instances( void add_device_normalization_fwd_rank_5_3_swish_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, Swish, 5, 3>>>&); std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, Swish, 5, 3>>>&);
// FP32 // FP32
void add_device_normalization_rank_5_3_swish_f32_instances( void add_device_normalization_fwd_rank_5_3_swish_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Swish, 5, 3>>>&); std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Swish, 5, 3>>>&);
// [x, gamma, beta, y] = [f16, f32, f32, f16] // [x, gamma, beta, y] = [f16, f32, f32, f16]
void add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances( void add_device_normalization_fwd_rank_5_3_swish_f16_f32_f32_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F32, F32, F16, F32, Swish, 5, 3>>>&); std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F32, F32, F16, F32, Swish, 5, 3>>>&);
template <typename XDataType, template <typename XDataType,
typename GammaDataType, typename GammaDataType,
...@@ -37,7 +37,7 @@ template <typename XDataType, ...@@ -37,7 +37,7 @@ template <typename XDataType,
index_t Rank, index_t Rank,
index_t NumReduceDim> index_t NumReduceDim>
struct DeviceOperationInstanceFactory< struct DeviceOperationInstanceFactory<
ck::tensor_operation::device::DeviceNormalization<XDataType, ck::tensor_operation::device::DeviceNormalizationFwd<XDataType,
GammaDataType, GammaDataType,
BetaDataType, BetaDataType,
YDataType, YDataType,
...@@ -46,7 +46,7 @@ struct DeviceOperationInstanceFactory< ...@@ -46,7 +46,7 @@ struct DeviceOperationInstanceFactory<
Rank, Rank,
NumReduceDim>> NumReduceDim>>
{ {
using DeviceOp = DeviceNormalization<XDataType, using DeviceOp = DeviceNormalizationFwd<XDataType,
GammaDataType, GammaDataType,
BetaDataType, BetaDataType,
YDataType, YDataType,
...@@ -65,7 +65,7 @@ struct DeviceOperationInstanceFactory< ...@@ -65,7 +65,7 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(Rank == 5 && NumReduceDim == 3) if constexpr(Rank == 5 && NumReduceDim == 3)
{ {
add_device_normalization_rank_5_3_swish_f16_instances(op_ptrs); add_device_normalization_fwd_rank_5_3_swish_f16_instances(op_ptrs);
} }
} }
else if constexpr(is_same_v<XDataType, F32> && is_same_v<GammaDataType, F32> && else if constexpr(is_same_v<XDataType, F32> && is_same_v<GammaDataType, F32> &&
...@@ -74,7 +74,7 @@ struct DeviceOperationInstanceFactory< ...@@ -74,7 +74,7 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(Rank == 5 && NumReduceDim == 3) if constexpr(Rank == 5 && NumReduceDim == 3)
{ {
add_device_normalization_rank_5_3_swish_f32_instances(op_ptrs); add_device_normalization_fwd_rank_5_3_swish_f32_instances(op_ptrs);
} }
} }
else if constexpr(is_same_v<XDataType, F16> && is_same_v<GammaDataType, F32> && else if constexpr(is_same_v<XDataType, F16> && is_same_v<GammaDataType, F32> &&
...@@ -83,7 +83,7 @@ struct DeviceOperationInstanceFactory< ...@@ -83,7 +83,7 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(Rank == 5 && NumReduceDim == 3) if constexpr(Rank == 5 && NumReduceDim == 3)
{ {
add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances(op_ptrs); add_device_normalization_fwd_rank_5_3_swish_f16_f32_f32_f16_instances(op_ptrs);
} }
} }
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using F16 = ck::half_t;
using F32 = float;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using device_transpose_f16_instances = std::tuple<
// FOR 16, 32, 16, 32, 16
// clang-format off
DeviceElementwise3dImpl<ck::Tuple<F16>, ck::Tuple<F16>, PassThrough, 2, 2, 1, 8, 8, 8, ck::Sequence<1>, ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>, ck::Tuple<F16>, PassThrough, 2, 2, 1, 8, 1, 1, ck::Sequence<1>, ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>, ck::Tuple<F16>, PassThrough, 2, 2, 1, 8, 4, 4, ck::Sequence<1>, ck::Sequence<1>>
// clang-format on
>;
using device_transpose_f32_instances = std::tuple<
// for 16, 8, 16, 32, 8 -> test with instances for fp16
// clang-format off
DeviceElementwise3dImpl<ck::Tuple<F32>, ck::Tuple<F32>, PassThrough, 2, 2, 1, 4, 4, 4, ck::Sequence<1>, ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F32>, ck::Tuple<F32>, PassThrough, 2, 2, 1, 4, 8, 4, ck::Sequence<1>, ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F32>, ck::Tuple<F32>, PassThrough, 2, 2, 1, 4, 8, 8, ck::Sequence<1>, ck::Sequence<1>>
// clang-format on
>;
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using F16 = ck::half_t;
using F32 = float;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
void add_device_transpose_f16_instances(
std::vector<std::unique_ptr<DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, PassThrough, 5>>>&
instances);
void add_device_transpose_f32_instances(
std::vector<std::unique_ptr<DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, PassThrough, 5>>>&
instances);
template <typename InDataTypeTuple,
typename OutDataTypeTuple,
typename ElementwiseOperation,
index_t NumDim>
struct DeviceOperationInstanceFactory<
ck::tensor_operation::device::
DeviceElementwise<InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim>>
{
using DeviceOp =
DeviceElementwise<InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim>;
static auto GetInstances()
{
std::vector<std::unique_ptr<DeviceOp>> op_ptrs;
if constexpr(is_same_v<InDataTypeTuple, ck::Tuple<F32>> &&
is_same_v<OutDataTypeTuple, ck::Tuple<F32>>)
{
add_device_transpose_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataTypeTuple, ck::Tuple<F16>> &&
is_same_v<OutDataTypeTuple, ck::Tuple<F16>>)
{
add_device_transpose_f16_instances(op_ptrs);
}
return op_ptrs;
}
};
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
set(DEVICE_NORMALIZATION_INSTANCES)
list(APPEND DEVICE_NORMALIZATION_INSTANCES
device_layernorm2d_f16_instance.cpp
device_layernorm4d_f16_instance.cpp
device_groupnorm_f16_instance.cpp
device_groupnorm_swish_f16_instance.cpp
device_groupnorm_swish_f16_f32_f32_f16_instance.cpp
device_layernorm2d_f32_instance.cpp
device_layernorm4d_f32_instance.cpp
device_groupnorm_f32_instance.cpp
device_groupnorm_swish_f32_instance.cpp)
add_instance_library(device_normalization_instance ${DEVICE_NORMALIZATION_INSTANCES})
set(DEVICE_NORMALIZATION_FWD_INSTANCES)
list(APPEND DEVICE_NORMALIZATION_FWD_INSTANCES
device_layernorm2d_fwd_f16_instance.cpp
device_layernorm4d_fwd_f16_instance.cpp
device_groupnorm_fwd_f16_instance.cpp
device_groupnorm_fwd_swish_f16_instance.cpp
device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp
device_layernorm2d_fwd_f32_instance.cpp
device_layernorm4d_fwd_f32_instance.cpp
device_groupnorm_fwd_f32_instance.cpp
device_groupnorm_fwd_swish_f32_instance.cpp)
add_instance_library(device_normalization_fwd_instance ${DEVICE_NORMALIZATION_FWD_INSTANCES})
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Pass = ck::tensor_operation::element_wise::PassThrough; using Pass = ck::tensor_operation::element_wise::PassThrough;
void add_device_normalization_rank_5_3_f16_instances( void add_device_normalization_fwd_rank_5_3_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, Pass, 5, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, Pass, 5, 3>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Pass = ck::tensor_operation::element_wise::PassThrough; using Pass = ck::tensor_operation::element_wise::PassThrough;
void add_device_normalization_rank_5_3_f32_instances( void add_device_normalization_fwd_rank_5_3_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Pass, 5, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Pass, 5, 3>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Swish = ck::tensor_operation::element_wise::Swish; using Swish = ck::tensor_operation::element_wise::Swish;
void add_device_normalization_rank_5_3_swish_f16_f32_f32_f16_instances( void add_device_normalization_fwd_rank_5_3_swish_f16_f32_f32_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F32, F32, F16, F32, Swish, 5, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F32, F32, F16, F32, Swish, 5, 3>>>&
instances) instances)
{ {
add_device_operation_instances( add_device_operation_instances(
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Swish = ck::tensor_operation::element_wise::Swish; using Swish = ck::tensor_operation::element_wise::Swish;
void add_device_normalization_rank_5_3_swish_f16_instances( void add_device_normalization_fwd_rank_5_3_swish_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, Swish, 5, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, Swish, 5, 3>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Swish = ck::tensor_operation::element_wise::Swish; using Swish = ck::tensor_operation::element_wise::Swish;
void add_device_normalization_rank_5_3_swish_f32_instances( void add_device_normalization_fwd_rank_5_3_swish_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Swish, 5, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Swish, 5, 3>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Pass = ck::tensor_operation::element_wise::PassThrough; using Pass = ck::tensor_operation::element_wise::PassThrough;
void add_device_normalization_rank_2_1_f16_instances( void add_device_normalization_fwd_rank_2_1_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, Pass, 2, 1>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, Pass, 2, 1>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Pass = ck::tensor_operation::element_wise::PassThrough; using Pass = ck::tensor_operation::element_wise::PassThrough;
void add_device_normalization_rank_2_1_f32_instances( void add_device_normalization_fwd_rank_2_1_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Pass, 2, 1>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Pass, 2, 1>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Pass = ck::tensor_operation::element_wise::PassThrough; using Pass = ck::tensor_operation::element_wise::PassThrough;
void add_device_normalization_rank_4_3_f16_instances( void add_device_normalization_fwd_rank_4_3_f16_instances(
std::vector<std::unique_ptr<DeviceNormalization<F16, F16, F16, F16, F32, Pass, 4, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F16, F16, F16, F16, F32, Pass, 4, 3>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "normalization_instance_common.hpp" #include "normalization_fwd_instance_common.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -10,8 +10,8 @@ namespace instance { ...@@ -10,8 +10,8 @@ namespace instance {
using Pass = ck::tensor_operation::element_wise::PassThrough; using Pass = ck::tensor_operation::element_wise::PassThrough;
void add_device_normalization_rank_4_3_f32_instances( void add_device_normalization_fwd_rank_4_3_f32_instances(
std::vector<std::unique_ptr<DeviceNormalization<F32, F32, F32, F32, F32, Pass, 4, 3>>>& std::vector<std::unique_ptr<DeviceNormalizationFwd<F32, F32, F32, F32, F32, Pass, 4, 3>>>&
instances) instances)
{ {
add_device_operation_instances(instances, add_device_operation_instances(instances,
......
add_instance_library(device_transpose_instance
device_transpose_instances_3d.cpp
)
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using F16 = ck::half_t;
using F32 = float;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
void add_device_transpose_f16_instances(
std::vector<std::unique_ptr<DeviceElementwise<ck::Tuple<F16>, ck::Tuple<F16>, PassThrough, 5>>>&
instances)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_transpose_f16_instances{});
#else
ignore = instances;
#endif
}
void add_device_transpose_f32_instances(
std::vector<std::unique_ptr<DeviceElementwise<ck::Tuple<F32>, ck::Tuple<F32>, PassThrough, 5>>>&
instances)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_transpose_f32_instances{});
#else
ignore = instances;
#endif
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment