"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "4361cffd85e94a6831ce20c49e11be4e7deeef76"
Commit e9ecf8d1 authored by Astha Rai's avatar Astha Rai
Browse files

fixed errors in client example

parent 11001fa3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using F16 = ck::half_t;
using F32 = float;
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using device_transpose_f16_instances = std::tuple<
// clang-format off FOR 16, 32, 16, 32, 16
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
8,
8,
ck::Sequence<8>,
ck::Sequence<8>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
8,
8,
ck::Sequence<8>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
8,
8,
ck::Sequence<1>,
ck::Sequence<8>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
8,
8,
ck::Sequence<1>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
1,
1,
ck::Sequence<1>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
1,
1,
ck::Sequence<8>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
4,
4,
ck::Sequence<1>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F16>,
ck::Tuple<F16>,
2,
2,
1,
8,
4,
4,
ck::Sequence<8>,
ck::Sequence<8>>
// clang-format on
>;
using device_transpose_f32_instances = std::tuple<
// clang-format off // for 16, 8, 16, 32, 8 -> test with instances for fp16
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
4,
4,
ck::Sequence<1>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
4,
4,
ck::Sequence<8>,
ck::Sequence<1>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
4,
4,
ck::Sequence<8>,
ck::Sequence<8>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
8,
4,
ck::Sequence<8>,
ck::Sequence<8>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
8,
8,
ck::Sequence<8>,
ck::Sequence<8>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
8,
8,
ck::Sequence<4>,
ck::Sequence<8>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
8,
8,
ck::Sequence<4>,
ck::Sequence<4>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
8,
8,
ck::Sequence<8>,
ck::Sequence<4>>,
DeviceElementwise3dImpl<ck::Tuple<F32>,
ck::Tuple<F32>,
2,
2,
1,
4,
4,
8,
ck::Sequence<8>,
ck::Sequence<8>>,
// clang-format on
>;
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <vector> #include <vector>
#include <memory> #include <memory>
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp" #include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
using namespace ck::transpose_op; void add_device_transpose_f16_instances(
std::vector<std::unique_ptr<DeviceElementwise3dImpl<F16, F16, NCDHW, 3>>>& instances);
void add_device_transpose_f16_instances(
std::vector<std::unique_ptr<DeviceElementwise3dImpl<F16, F16, NCDHW, 3>>>& instances); void add_device_transpose_f32_instances(
std::vector<std::unique_ptr<DeviceElementwise3dImpl<F32, F32, NCDHW, 3>>>& instances);
void add_device_transpose_f32_instances(
std::vector<std::unique_ptr<DeviceElementwise3dImpl<F32, F32, NCDHW, 3>>>& instances); template <typename InDataTypeTuple,
typename OutDataTypeTuple,
template <typename InDataTypeTuple, typename ElementwiseOperation,
typename OutDataTypeTuple, index_t NumDim>
typename ElementwiseOperation, struct DeviceOperationInstanceFactory<
index_t NumDim> ck::tensor_operation::device::
struct DeviceOperationInstanceFactory< DeviceElementwise3dImpl<InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim>>
ck::tensor_operation::device:: {
DeviceElementwise3dImpl<InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim>> using DeviceOp = DeviceElementwise3dImpl<InDataTypeTuple,
{ OutDataTypeTuple,
using DeviceOp = DeviceElementwise3dImpl<InDataTypeTuple, ElementwiseOperation,
OutDataTypeTuple, NumDim_m, // choose how to set dims
ElementwiseOperation, NumDim_n,
NumDim_m, // choose how to set dims NumDim_k,
NumDim_n, MPerThread,
NumDim_k, NPerThread,
MPerThread, KPerThread,
NPerThread, InScalarPerVectorSeq,
KPerThread, OutScalarPerVectorSeq>;
InScalarPerVectorSeq,
OutScalarPerVectorSeq>; static auto GetInstances()
{
static auto GetInstances() std::vector<std::unique_ptr<DeviceOp>> op_ptrs;
{ if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
std::vector<std::unique_ptr<DeviceOp>> op_ptrs; {
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>) add_device_transpose_f32_instances(op_ptrs);
{ }
add_device_transpose_f32_instances(op_ptrs); else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
} {
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>) add_device_transpose_f16_instances(op_ptrs);
{ }
add_device_transpose_f16_instances(op_ptrs); }
} return op_ptrs;
} }
return op_ptrs; };
}
}; } // namespace instance
} // namespace device
} // namespace instance } // namespace tensor_operation
} // namespace device } // namespace ck
} // namespace tensor_operation
} // namespace ck
add_instance_library(device_transpose_instance
device_transpose_instances_3d.cpp
)
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_transpose_f16_instances(
std::vector<std::unique_ptr<DeviceElementwise3dImpl<F16, F16, NCDHW, 3>>>& instances)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_transpose_f16_instances<F16, F16, NCDHW, 3>{});
#else
ignore = instances;
#endif
}
void add_device_transpose_f32_instances(
std::vector<std::unique_ptr<DeviceElementwise3dImpl<F32, F32, NCDHW, 3>>>& instances)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_transpose_f32_instances<F32, F32, NCDHW, 3>{});
#else
ignore = instances;
#endif
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment