"...composable_kernel.git" did not exist on "63eee2d9991b08ca286f6895dd8f90da12a62da3"
Commit 28bfc452 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent 40b2fad3
...@@ -12,28 +12,70 @@ ...@@ -12,28 +12,70 @@
#include "host_gemm.hpp" #include "host_gemm.hpp"
#include "device_tensor.hpp" #include "device_tensor.hpp"
#include "device_base.hpp" #include "device_base.hpp"
#include "example/2_gemm_xdl_bias_add/include/device_gemm_xdl_two_extra_source_reduce.hpp" #include "example/2_gemm_xdl_bias_relu_add/include/device_gemm_xdl_two_extra_source_reduce.hpp"
// C[m, n] = Relu(A[m, k] * B[k, n] + C0[m]) + C1[m, n]
// assume C0 is contiguous in memory
// C0 resides in memory as 1d vector [m], but is represented as 2D matrix [m, n], with stride =
// 0 in the "n" dimension
// assume C1 and C have same layout C
// v0 is from A * B
// v1 is from C0
// v2 is from C1
struct BiasReluAdd
{
template <typename T1, typename T2>
__host__ constexpr float operator()(float v0, T1 v1, T2 v2) const
{
float a = v0 + v1;
float b = 0.1 * a;
float c = b > 0 ? b : 0;
float d = c + v2;
struct PassThrough return d;
}
template <typename T1, typename T2>
__device__ constexpr float operator()(float v0, T1 v1, T2 v2) const
{
constexpr float alpha = 0.1;
constexpr float alpha_inv = 1.0 / alpha;
float a = v2 * alpha_inv;
float b = v1 + v0;
float c = max(b, float(0));
float d = alpha * (a + c);
return d;
}
};
struct BiasRelu
{ {
template <typename T> template <typename T1, typename T2>
__host__ __device__ constexpr T operator()(T v) const __host__ constexpr float operator()(float v0, T1 v1, T2) const
{ {
return v; float a = v0 + v1;
float b = 0.1 * a;
float c = b > 0 ? b : 0;
return c;
}
template <typename T1, typename T2>
__device__ constexpr float operator()(float v0, T1 v1, T2) const
{
constexpr float alpha = 0.1;
float b = v1 + v0;
float c = max(b, float(0));
float d = alpha * c;
return d;
} }
}; };
// GEMM Bias Add:
// C[m, n] = alpha(A[m, k] * B[k, n]) + beta * C0[m, n] + gamma * C1[m]
// assume C0 has same layout as C
// assume C1 is contiguous in memory
// C1 presents in memory as 1d vector, but is represented as 2D matrix C1[m, n], with stride = 0 in
// the "n" dimension
//
// alpha * v0 + beta * v1 + gamma * v2
// v0 is from C matrix
// v1 is from residual matrix
// v2 is from bias vector
struct BiasAdd struct BiasAdd
{ {
#if 1 #if 1
...@@ -88,90 +130,40 @@ struct BiasAdd ...@@ -88,90 +130,40 @@ struct BiasAdd
#endif #endif
}; };
template <typename ADataType, struct PassThrough
typename BDataType,
typename CDataType,
typename ALayout,
typename BLayout,
typename CLayout,
typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation>
struct DeviceGemmInstance;
template <typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation>
struct DeviceGemmInstance<ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::gemm::RowMajor,
ck::tensor_layout::gemm::ColumnMajor,
ck::tensor_layout::gemm::RowMajor,
AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation>
{ {
using F16 = ck::half_t; template <typename T>
using F32 = float; __host__ __device__ constexpr T operator()(T v) const
{
using Row = ck::tensor_layout::gemm::RowMajor; return v;
using Col = ck::tensor_layout::gemm::ColumnMajor; }
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using AOp = AElementwiseOperation;
using BOp = BElementwiseOperation;
using COp = CElementwiseOperation;
// Compilation parameters for NT problem
// clang-format off
using type =
//#################################################################| AData| BData| CData| AccData| ALayout| BLayout| CLayout| AElementwise| BElementwise| CElementwise| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds|
//#################################################################| Type| Type| Type| Type| | | | Operation| Operation| Operation| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN|
//#################################################################| | | | | | | | | | | | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | |
//#################################################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
ck::tensor_operation::device::DeviceGemmXdl_two_extra_source_reduce< F16, F16, F16, F32, Row, Col, Row, AOp, BOp, COp, 256, 256, 128, 4, 8, 32, 32, 4, 2, S<1, 4, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, S<1, 2, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 7, 1, true, true>;
// clang-format on
}; };
template <typename AElementwiseOperation, template <ck::index_t... Is>
typename BElementwiseOperation, using S = ck::Sequence<Is...>;
typename CElementwiseOperation>
struct DeviceGemmInstance<float, using ADataType = ck::half_t;
float, using BDataType = ck::half_t;
float, using CDataType = ck::half_t;
ck::tensor_layout::gemm::RowMajor, using AccDataType = float;
ck::tensor_layout::gemm::ColumnMajor,
ck::tensor_layout::gemm::RowMajor, using ALayout = ck::tensor_layout::gemm::RowMajor;
AElementwiseOperation, using BLayout = ck::tensor_layout::gemm::ColumnMajor;
BElementwiseOperation, using CLayout = ck::tensor_layout::gemm::RowMajor;
CElementwiseOperation>
{ using AOp = PassThrough;
using F16 = ck::half_t; using BOp = PassThrough;
using F32 = float; using COp = BiasReluAdd;
using Row = ck::tensor_layout::gemm::RowMajor; // Compilation parameters for NT problem
using Col = ck::tensor_layout::gemm::ColumnMajor; // clang-format off
using DeviceGemmInstance =
template <ck::index_t... Is> //#################################################################| AData| BData| CData| AccData| ALayout| BLayout| CLayout| AElementwise| BElementwise| CElementwise| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds|
using S = ck::Sequence<Is...>; //#################################################################| Type| Type| Type| Type| | | | Operation| Operation| Operation| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN|
//#################################################################| | | | | | | | | | | | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | |
using AOp = AElementwiseOperation; //#################################################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
using BOp = BElementwiseOperation; ck::tensor_operation::device::DeviceGemmXdl_two_extra_source_reduce< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AOp, BOp, COp, 256, 256, 128, 4, 8, 32, 32, 4, 2, S<1, 4, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, S<1, 2, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 7, 1, true, true>;
using COp = CElementwiseOperation; // clang-format on
// Compilation parameters for NT problem
// clang-format off
using type =
//#################################################################| AData| BData| CData| AccData| ALayout| BLayout| CLayout| AElementwise| BElementwise| CElementwise| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds|
//#################################################################| Type| Type| Type| Type| | | | Operation| Operation| Operation| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN|
//#################################################################| | | | | | | | | | | | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | |
//#################################################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
ck::tensor_operation::device::DeviceGemmXdl_two_extra_source_reduce< F32, F32, F32, F32, Row, Col, Row, AOp, BOp, COp, 256, 256, 128, 4, 4, 32, 32, 4, 2, S<1, 4, 4>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 4, 4, S<1, 2, 4>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 4, 4, 7, 1, true, true>;
// clang-format on
};
template <typename AType, template <typename AType,
typename BType, typename BType,
...@@ -210,7 +202,40 @@ static void host_verify(const Tensor<AType>& a_m_k, ...@@ -210,7 +202,40 @@ static void host_verify(const Tensor<AType>& a_m_k,
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
if(argc != 10) bool do_verification = 0;
int init_method = 0;
int nrepeat = 5;
// GEMM shape
ck::index_t M = 3840;
ck::index_t N = 4096;
ck::index_t K = 4096;
ck::index_t StrideA = 4096;
ck::index_t StrideB = 4096;
ck::index_t StrideC = 4096;
if(argc == 4)
{
M = std::stoi(argv[4]);
N = std::stoi(argv[5]);
K = std::stoi(argv[6]);
}
else if(argc == 10)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
nrepeat = std::stoi(argv[3]);
M = std::stoi(argv[4]);
N = std::stoi(argv[5]);
K = std::stoi(argv[6]);
StrideA = std::stoi(argv[7]);
StrideB = std::stoi(argv[8]);
StrideC = std::stoi(argv[9]);
}
else
{ {
printf("arg1: verification (0=no, 1=yes)\n"); printf("arg1: verification (0=no, 1=yes)\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"); printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
...@@ -219,35 +244,6 @@ int main(int argc, char* argv[]) ...@@ -219,35 +244,6 @@ int main(int argc, char* argv[])
exit(0); exit(0);
} }
const bool do_verification = std::stoi(argv[1]);
const int init_method = std::stoi(argv[2]);
const int nrepeat = std::stoi(argv[3]);
// GEMM shape
ck::index_t M = std::stoi(argv[4]);
ck::index_t N = std::stoi(argv[5]);
ck::index_t K = std::stoi(argv[6]);
ck::index_t StrideA = std::stoi(argv[7]);
ck::index_t StrideB = std::stoi(argv[8]);
ck::index_t StrideC = std::stoi(argv[9]);
// matrix data type
#if 1
using ADataType = ck::half_t;
using BDataType = ck::half_t;
using CDataType = ck::half_t;
#else
using ADataType = float;
using BDataType = float;
using CDataType = float;
#endif
// matrix layout
using ALayout = ck::tensor_layout::gemm::RowMajor;
using BLayout = ck::tensor_layout::gemm::ColumnMajor;
using CLayout = ck::tensor_layout::gemm::RowMajor;
auto f_host_tensor_descriptor = auto f_host_tensor_descriptor =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) { [](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value) if(std::is_same<decltype(layout), ck::tensor_layout::gemm::RowMajor>::value)
...@@ -267,14 +263,14 @@ int main(int argc, char* argv[]) ...@@ -267,14 +263,14 @@ int main(int argc, char* argv[])
Tensor<BDataType> c_m_n_host_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{})); Tensor<BDataType> c_m_n_host_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
Tensor<BDataType> c_m_n_device_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{})); Tensor<BDataType> c_m_n_device_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
// C0[m ,n] // C0[m]
Tensor<BDataType> c0_m_n(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
// C1[m]
Tensor<CDataType> c1_m_n(HostTensorDescriptor( Tensor<CDataType> c1_m_n(HostTensorDescriptor(
std::vector<std::size_t>({static_cast<std::size_t>(M), static_cast<std::size_t>(N)}), std::vector<std::size_t>({static_cast<std::size_t>(M), static_cast<std::size_t>(N)}),
std::vector<std::size_t>({1, 0}))); std::vector<std::size_t>({1, 0})));
// C1[m ,n]
Tensor<BDataType> c0_m_n(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
std::cout << "a_m_k: " << a_m_k.mDesc << std::endl; std::cout << "a_m_k: " << a_m_k.mDesc << std::endl;
std::cout << "b_k_n: " << b_k_n.mDesc << std::endl; std::cout << "b_k_n: " << b_k_n.mDesc << std::endl;
std::cout << "c_m_n: " << c_m_n_host_result.mDesc << std::endl; std::cout << "c_m_n: " << c_m_n_host_result.mDesc << std::endl;
...@@ -309,18 +305,10 @@ int main(int argc, char* argv[]) ...@@ -309,18 +305,10 @@ int main(int argc, char* argv[])
c0_m_n_device_buf.ToDevice(c0_m_n.mData.data()); c0_m_n_device_buf.ToDevice(c0_m_n.mData.data());
c1_m_n_device_buf.ToDevice(c1_m_n.mData.data()); c1_m_n_device_buf.ToDevice(c1_m_n.mData.data());
auto c_element_op = BiasAdd{}; auto c_element_op = BiasReluAdd{};
// do GEMM // do GEMM
auto gemm = typename DeviceGemmInstance<ADataType, auto gemm = DeviceGemmInstance{};
BDataType,
CDataType,
ALayout,
BLayout,
CLayout,
PassThrough,
PassThrough,
decltype(c_element_op)>::type{};
auto invoker = gemm.MakeInvoker(); auto invoker = gemm.MakeInvoker();
auto argument = gemm.MakeArgument(static_cast<ADataType*>(a_m_k_device_buf.GetDeviceBuffer()), auto argument = gemm.MakeArgument(static_cast<ADataType*>(a_m_k_device_buf.GetDeviceBuffer()),
......
...@@ -97,6 +97,31 @@ struct BiasReluAdd ...@@ -97,6 +97,31 @@ struct BiasReluAdd
} }
}; };
struct BiasRelu
{
template <typename T1, typename T2>
__host__ constexpr float operator()(float v0, T1 v1, T2) const
{
float a = v0 + v1;
float b = 0.1 * a;
float c = b > 0 ? b : 0;
return c;
}
template <typename T1, typename T2>
__device__ constexpr float operator()(float v0, T1 v1, T2) const
{
constexpr float alpha = 0.1;
float b = v1 + v0;
float c = max(b, float(0));
float d = alpha * c;
return d;
}
};
using InDataType = ck::half_t; using InDataType = ck::half_t;
using WeiDataType = ck::half_t; using WeiDataType = ck::half_t;
using OutDataType = ck::half_t; using OutDataType = ck::half_t;
...@@ -113,13 +138,13 @@ using InElementOp = PassThrough; ...@@ -113,13 +138,13 @@ using InElementOp = PassThrough;
using WeiElementOp = PassThrough; using WeiElementOp = PassThrough;
using OutElementOp = BiasReluAdd; using OutElementOp = BiasReluAdd;
// clang-format off
using DeviceConvFwdInstance = using DeviceConvFwdInstance =
// clang-format off //################################################################| NDim| InData| WeiData| OutData| AccData| In| Wei| Out| In| Wei| Out| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds|
//################################################################| NDim| InData| WeiData| OutData| AccData| In| Wei| Out| In| Wei| Out| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds| //################################################################| Spatial| Type| Type| Type| Type| Layout| Layout| Layout| Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN|
//################################################################| Spatial| Type| Type| Type| Type| Layout| Layout| Layout| Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN| //################################################################| | | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | |
//################################################################| | | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | | //################################################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
//################################################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ck::tensor_operation::device::DeviceConvFwdXdl_bias_activation_add< 2, InDataType, WeiDataType, OutDataType, AccDataType, InLayout, WeiLayout, OutLayout, InElementOp, WeiElementOp, OutElementOp, 256, 128, 256, 4, 8, 32, 32, 2, 4, S<1, 2, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, S<1, 4, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 7, 1, true, true>;
ck::tensor_operation::device::DeviceConvFwdXdl_bias_activation_add< 2, InDataType, WeiDataType, OutDataType, AccDataType, InLayout, WeiLayout, OutLayout, InElementOp, WeiElementOp, OutElementOp, 256, 128, 256, 4, 8, 32, 32, 2, 4, S<1, 2, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, S<1, 4, 8>, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 7, 1, true, true>;
// clang-format on // clang-format on
template <typename TIn, template <typename TIn,
......
...@@ -12,16 +12,16 @@ include_directories(BEFORE ...@@ -12,16 +12,16 @@ include_directories(BEFORE
) )
set(GEMM_XDL_SOURCE 1_gemm_xdl/gemm_xdl.cpp) set(GEMM_XDL_SOURCE 1_gemm_xdl/gemm_xdl.cpp)
set(GEMM_XDL_BIAS_ADD_SOURCE 2_gemm_xdl_bias_add/gemm_xdl_bias_add.cpp) set(GEMM_XDL_BIAS_RELU_ADD_SOURCE 2_gemm_xdl_bias_relu_add/gemm_xdl_bias_relu_add.cpp)
set(CONV_XDL_SOURCE 3_conv_xdl/conv_xdl.cpp) set(CONV_XDL_SOURCE 3_conv_xdl/conv_xdl.cpp)
set(CONV_XDL_BIAS_RELU_ADD_SOURCE 4_conv_xdl_bias_relu_add/conv_xdl_bias_relu_add.cpp) set(CONV_XDL_BIAS_RELU_ADD_SOURCE 4_conv_xdl_bias_relu_add/conv_xdl_bias_relu_add.cpp)
add_executable(gemm_xdl ${GEMM_XDL_SOURCE}) add_executable(gemm_xdl ${GEMM_XDL_SOURCE})
add_executable(gemm_xdl_bias_add ${GEMM_XDL_BIAS_ADD_SOURCE}) add_executable(gemm_xdl_bias_relu_add ${GEMM_XDL_BIAS_RELU_ADD_SOURCE})
add_executable(conv_xdl ${CONV_XDL_SOURCE}) add_executable(conv_xdl ${CONV_XDL_SOURCE})
add_executable(conv_xdl_bias_relu_add ${CONV_XDL_BIAS_RELU_ADD_SOURCE}) add_executable(conv_xdl_bias_relu_add ${CONV_XDL_BIAS_RELU_ADD_SOURCE})
target_link_libraries(gemm_xdl PRIVATE host_tensor) target_link_libraries(gemm_xdl PRIVATE host_tensor)
target_link_libraries(gemm_xdl_bias_add PRIVATE host_tensor) target_link_libraries(gemm_xdl_bias_relu_add PRIVATE host_tensor)
target_link_libraries(conv_xdl PRIVATE host_tensor) target_link_libraries(conv_xdl PRIVATE host_tensor)
target_link_libraries(conv_xdl_bias_relu_add PRIVATE host_tensor) target_link_libraries(conv_xdl_bias_relu_add PRIVATE host_tensor)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment