Commit 02684438 authored by carlushuang's avatar carlushuang
Browse files

add BiasAddRelu

parent 4b448373
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
#define TEST_FUSION_BIAS_RELU_ADD 0 #define TEST_FUSION_BIAS_RELU_ADD 0
#define TEST_FUSION_BIAS_RELU 1 #define TEST_FUSION_BIAS_RELU 1
#define TEST_FUSION_BIAS 2 #define TEST_FUSION_BIAS 2
#define TEST_FUSION TEST_FUSION_BIAS #define TEST_FUSION_BIAS_ADD_RELU 3
#define TEST_FUSION TEST_FUSION_BIAS_ADD_RELU
#define TEST_LAYOUT_NHWC_KYXC_NHWK 0 #define TEST_LAYOUT_NHWC_KYXC_NHWK 0
#define TEST_LAYOUT_NHWC_KYXCK8_NHWK 1 #define TEST_LAYOUT_NHWC_KYXCK8_NHWK 1
...@@ -39,6 +40,7 @@ using PassThrough = ck::tensor_operation::cpu::element_wise::PassThrough; ...@@ -39,6 +40,7 @@ using PassThrough = ck::tensor_operation::cpu::element_wise::PassThrough;
using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd; using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd;
using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu; using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu;
using Add = ck::tensor_operation::cpu::element_wise::Add; using Add = ck::tensor_operation::cpu::element_wise::Add;
using AddAddRelu = ck::tensor_operation::cpu::element_wise::AddAddRelu;
// ------------------ nhwc-kyxc-nhwk // ------------------ nhwc-kyxc-nhwk
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk( void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk(
...@@ -71,6 +73,18 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_local_c( ...@@ -71,6 +73,18 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_local_c(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_mt( void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>>& instances); std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>>& instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
// ------------------ nhwc-kcyxk8-nhwk // ------------------ nhwc-kcyxk8-nhwk
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk( void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddReluAdd>>& std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddReluAdd>>&
...@@ -102,6 +116,18 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_local_c( ...@@ -102,6 +116,18 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_local_c(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_mt( void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>>& instances); std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>>& instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
// ------------------ nhwc-yxck-nhwk // ------------------ nhwc-yxck-nhwk
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk( void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddReluAdd>>& std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddReluAdd>>&
...@@ -133,6 +159,18 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c( ...@@ -133,6 +159,18 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c(
void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_mt( void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>>& instances); std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>>& instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>>&
instances);
} // namespace device_conv2d_fwd_bias_activation_add_avx2_instance } // namespace device_conv2d_fwd_bias_activation_add_avx2_instance
} // namespace device } // namespace device
} // namespace cpu } // namespace cpu
...@@ -147,6 +185,8 @@ using OutElementOp = ck::tensor_operation::cpu::element_wise::AddReluAdd; ...@@ -147,6 +185,8 @@ using OutElementOp = ck::tensor_operation::cpu::element_wise::AddReluAdd;
using OutElementOp = ck::tensor_operation::cpu::element_wise::AddRelu; using OutElementOp = ck::tensor_operation::cpu::element_wise::AddRelu;
#elif TEST_FUSION == TEST_FUSION_BIAS #elif TEST_FUSION == TEST_FUSION_BIAS
using OutElementOp = ck::tensor_operation::cpu::element_wise::Add; using OutElementOp = ck::tensor_operation::cpu::element_wise::Add;
#elif TEST_FUSION == TEST_FUSION_BIAS_ADD_RELU
using OutElementOp = ck::tensor_operation::cpu::element_wise::AddAddRelu;
#endif #endif
template <typename T> template <typename T>
...@@ -352,7 +392,7 @@ int main(int argc, char* argv[]) ...@@ -352,7 +392,7 @@ int main(int argc, char* argv[])
using WeiDataType = decltype(wei_type); using WeiDataType = decltype(wei_type);
using OutDataType = decltype(out_type); using OutDataType = decltype(out_type);
#if TEST_FUSION == TEST_FUSION_BIAS_RELU_ADD #if(TEST_FUSION == TEST_FUSION_BIAS_RELU_ADD) || (TEST_FUSION == TEST_FUSION_BIAS_ADD_RELU)
using ReferenceConvFwdInstance = using ReferenceConvFwdInstance =
ck::tensor_operation::host::ReferenceConvFwd_Bias_Activation_Add<InDataType, ck::tensor_operation::host::ReferenceConvFwd_Bias_Activation_Add<InDataType,
WeiDataType, WeiDataType,
...@@ -497,7 +537,7 @@ int main(int argc, char* argv[]) ...@@ -497,7 +537,7 @@ int main(int argc, char* argv[])
wei_k_c_y_x, wei_k_c_y_x,
out_n_k_ho_wo_host_result, out_n_k_ho_wo_host_result,
bias, bias,
#if TEST_FUSION == TEST_FUSION_BIAS_RELU_ADD #if(TEST_FUSION == TEST_FUSION_BIAS_RELU_ADD) || (TEST_FUSION == TEST_FUSION_BIAS_ADD_RELU)
residual, residual,
#endif #endif
conv_filter_strides, conv_filter_strides,
...@@ -514,6 +554,7 @@ int main(int argc, char* argv[]) ...@@ -514,6 +554,7 @@ int main(int argc, char* argv[])
using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd; using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd;
using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu; using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu;
using Add = ck::tensor_operation::cpu::element_wise::Add; using Add = ck::tensor_operation::cpu::element_wise::Add;
using AddAddRelu = ck::tensor_operation::cpu::element_wise::AddAddRelu;
#if TEST_FUSION == TEST_FUSION_BIAS_RELU_ADD #if TEST_FUSION == TEST_FUSION_BIAS_RELU_ADD
using DeviceConvFwdNoOpPtr = ck::tensor_operation::cpu::device:: using DeviceConvFwdNoOpPtr = ck::tensor_operation::cpu::device::
...@@ -524,6 +565,9 @@ int main(int argc, char* argv[]) ...@@ -524,6 +565,9 @@ int main(int argc, char* argv[])
#elif TEST_FUSION == TEST_FUSION_BIAS #elif TEST_FUSION == TEST_FUSION_BIAS
using DeviceConvFwdNoOpPtr = ck::tensor_operation::cpu::device:: using DeviceConvFwdNoOpPtr = ck::tensor_operation::cpu::device::
DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>; DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, Add>;
#elif TEST_FUSION == TEST_FUSION_BIAS_ADD_RELU
using DeviceConvFwdNoOpPtr = ck::tensor_operation::cpu::device::
DeviceConvFwdBiasActivationAddPtr<PassThrough, PassThrough, AddAddRelu>;
#endif #endif
// add device Conv instances // add device Conv instances
...@@ -738,6 +782,76 @@ int main(int argc, char* argv[]) ...@@ -738,6 +782,76 @@ int main(int argc, char* argv[])
add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c(conv_ptrs); add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c(conv_ptrs);
} }
#endif #endif
#elif TEST_FUSION == TEST_FUSION_BIAS_ADD_RELU
#if TEST_LAYOUT == TEST_LAYOUT_NHWC_KYXC_NHWK
if(omp_get_max_threads() > 1)
{
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk_mt(conv_ptrs);
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk(conv_ptrs);
}
else
{
if(K % 8 == 0)
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk(conv_ptrs);
else
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk_local_c(
conv_ptrs);
}
#endif
#if TEST_LAYOUT == TEST_LAYOUT_NHWC_KYXCK8_NHWK
if(omp_get_max_threads() > 1)
{
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk_mt(conv_ptrs);
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk(conv_ptrs);
}
else
{
if(K % 8 == 0)
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk(conv_ptrs);
else
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk_local_c(
conv_ptrs);
}
#endif
#if TEST_LAYOUT == TEST_LAYOUT_NHWC_YXCK_NHWK
if(omp_get_max_threads() > 1)
{
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk_mt(conv_ptrs);
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk(conv_ptrs);
}
else
{
if(K % 8 == 0)
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk(conv_ptrs);
else
ck::tensor_operation::cpu::device::
device_conv2d_fwd_bias_activation_add_avx2_instance::
add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk_local_c(
conv_ptrs);
}
#endif
#endif #endif
} }
......
...@@ -172,6 +172,51 @@ struct AddReluAdd ...@@ -172,6 +172,51 @@ struct AddReluAdd
static constexpr char* Name() { return "AddReluAdd"; } static constexpr char* Name() { return "AddReluAdd"; }
}; };
struct AddAddRelu
{
void operator()(float& y, const float& x0, const float& x1, const float& x2) const
{
float a = x0 + x1 + x2;
y = a > 0 ? a : 0;
}
void operator()(float4_t& y, const float4_t& x0, const float4_t& x1, const float4_t& x2) const
{
float4_t a = _mm_add_ps(x0, x1);
float4_t b = _mm_add_ps(a, x2);
y = _mm_max_ps(b, _mm_setzero_ps());
}
void operator()(float8_t& y, const float8_t& x0, const float8_t& x1, const float8_t& x2) const
{
float8_t a = _mm256_add_ps(x0, x1);
float8_t b = _mm256_add_ps(a, x2);
y = _mm256_max_ps(b, _mm256_setzero_ps());
}
float Apply(const float& x0, const float& x1, const float& x2) const
{
float a = x0 + x1 + x2;
return a > 0 ? a : 0;
}
float4_t Apply(const float4_t& x0, const float4_t& x1, const float4_t& x2) const
{
float4_t a = _mm_add_ps(x0, x1);
float4_t b = _mm_add_ps(a, x2);
return _mm_max_ps(b, _mm_setzero_ps());
}
float8_t Apply(const float8_t& x0, const float8_t& x1, const float8_t& x2) const
{
float8_t a = _mm256_add_ps(x0, x1);
float8_t b = _mm256_add_ps(a, x2);
return _mm256_max_ps(b, _mm256_setzero_ps());
}
static constexpr char* Name() { return "AddAddRelu"; }
};
// Unary operators are usually called element-wisely before/after the reduction is executed on the // Unary operators are usually called element-wisely before/after the reduction is executed on the
// elements. They are needed for easy implementation of reduction types of AVG, NRM1, NRM2 // elements. They are needed for easy implementation of reduction types of AVG, NRM1, NRM2
......
...@@ -623,6 +623,10 @@ struct GridwiseGemmBiasActivationAddAvx2_MxN ...@@ -623,6 +623,10 @@ struct GridwiseGemmBiasActivationAddAvx2_MxN
c_threadwise_copy.SetSrc2SliceOrigin(c_block_desc, c_threadwise_copy.SetSrc2SliceOrigin(c_block_desc,
GetCIndex(i_mc, i_nc)); GetCIndex(i_mc, i_nc));
_mm_prefetch(reinterpret_cast<const float*>(c1_grid_buf.p_data_) +
c_threadwise_copy.src2_offset,
_MM_HINT_T1);
if constexpr(!UseCLocalBuffer) if constexpr(!UseCLocalBuffer)
{ {
c_threadwise_copy.SetSrcSliceOrigin(c_block_desc, c_threadwise_copy.SetSrcSliceOrigin(c_block_desc,
......
...@@ -2403,7 +2403,7 @@ struct ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_ ...@@ -2403,7 +2403,7 @@ struct ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_
// dst_slice_origin_step_idx need to be known at compile-time, for performance reason // dst_slice_origin_step_idx need to be known at compile-time, for performance reason
void MoveDstSliceWindow(const DstDesc&, const Index&) {} void MoveDstSliceWindow(const DstDesc&, const Index&) {}
private: // private:
const ElementwiseOperation element_op_; const ElementwiseOperation element_op_;
intptr_t i_dst_gemm_m; intptr_t i_dst_gemm_m;
......
...@@ -25,6 +25,7 @@ using PT = ck::tensor_operation::cpu::element_wise::PassThrough; ...@@ -25,6 +25,7 @@ using PT = ck::tensor_operation::cpu::element_wise::PassThrough;
using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd; using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd;
using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu; using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu;
using Add = ck::tensor_operation::cpu::element_wise::Add; using Add = ck::tensor_operation::cpu::element_wise::Add;
using AddAddRelu = ck::tensor_operation::cpu::element_wise::AddAddRelu;
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::cpu::device::ConvolutionForwardSpecialization_t::Default; ck::tensor_operation::cpu::device::ConvolutionForwardSpecialization_t::Default;
...@@ -299,6 +300,86 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_mt( ...@@ -299,6 +300,86 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_mt(
)); ));
} }
/****************************************************************************************************/
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 64, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, false, true, true, false)
// clang-format on
));
}
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 64, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, true, true, true, false)
// clang-format on
));
}
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 24, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 32, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 40, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 48, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 48, 48, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 56, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 72, 16, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 72, 16, 256, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 72, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 72, 32, 256, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 96, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 96, 64, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 120, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 120, 64, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXC_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, true, true, true, false)
// clang-format on
));
}
} // namespace device_conv2d_fwd_bias_activation_add_avx2_instance } // namespace device_conv2d_fwd_bias_activation_add_avx2_instance
} // namespace device } // namespace device
} // namespace cpu } // namespace cpu
......
...@@ -25,6 +25,7 @@ using PT = ck::tensor_operation::cpu::element_wise::PassThrough; ...@@ -25,6 +25,7 @@ using PT = ck::tensor_operation::cpu::element_wise::PassThrough;
using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd; using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd;
using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu; using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu;
using Add = ck::tensor_operation::cpu::element_wise::Add; using Add = ck::tensor_operation::cpu::element_wise::Add;
using AddAddRelu = ck::tensor_operation::cpu::element_wise::AddAddRelu;
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::cpu::device::ConvolutionForwardSpecialization_t::Default; ck::tensor_operation::cpu::device::ConvolutionForwardSpecialization_t::Default;
...@@ -299,6 +300,86 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_mt( ...@@ -299,6 +300,86 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_mt(
)); ));
} }
/****************************************************************************************************/
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 64, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, false, true, true, false)
// clang-format on
));
}
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 64, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, true, true, true, false)
// clang-format on
));
}
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 24, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 32, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 40, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 48, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 48, 48, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 56, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 72, 16, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 72, 16, 256, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 72, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 72, 32, 256, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 96, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 96, 64, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 120, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 120, 64, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_KYXCK8_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, true, true, true, false)
// clang-format on
));
}
} // namespace device_conv2d_fwd_bias_activation_add_avx2_instance } // namespace device_conv2d_fwd_bias_activation_add_avx2_instance
} // namespace device } // namespace device
} // namespace cpu } // namespace cpu
......
...@@ -24,6 +24,7 @@ using PT = ck::tensor_operation::cpu::element_wise::PassThrough; ...@@ -24,6 +24,7 @@ using PT = ck::tensor_operation::cpu::element_wise::PassThrough;
using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd; using AddReluAdd = ck::tensor_operation::cpu::element_wise::AddReluAdd;
using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu; using AddRelu = ck::tensor_operation::cpu::element_wise::AddRelu;
using Add = ck::tensor_operation::cpu::element_wise::Add; using Add = ck::tensor_operation::cpu::element_wise::Add;
using AddAddRelu = ck::tensor_operation::cpu::element_wise::AddAddRelu;
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::cpu::device::ConvolutionForwardSpecialization_t::Default; ck::tensor_operation::cpu::device::ConvolutionForwardSpecialization_t::Default;
...@@ -298,6 +299,86 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_mt( ...@@ -298,6 +299,86 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_mt(
)); ));
} }
/****************************************************************************************************/
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 64, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, false, true, true, false)
// clang-format on
));
}
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 64, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, true, true, true, false)
// clang-format on
));
}
void add_device_conv2d_fwd_bias_add_relu_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddAddRelu>>& instances)
{
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 24, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 32, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 40, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 48, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 48, 48, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 56, 24, 256, 4, 24, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 72, 16, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 72, 16, 256, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 72, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 72, 32, 256, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 96, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 96, 64, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 120, 32, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 120, 64, 128, 6, 16, false, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 256, 128, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 128, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 512, 240, 128, 4, 24, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 512, 256, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 768, 320, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 896, 352, 128, 6, 16, true, true, true, false),
DEVICE_CONV2D_FWD_BAA_AVX2_NHWC_YXCK_NHWK_F32(PT, PT, AddAddRelu, 1024, 416, 128, 6, 16, true, true, true, false)
// clang-format on
));
}
} // namespace device_conv2d_fwd_bias_activation_add_avx2_instance } // namespace device_conv2d_fwd_bias_activation_add_avx2_instance
} // namespace device } // namespace device
} // namespace cpu } // namespace cpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment