Commit c6e072a6 authored by Jing Zhang's avatar Jing Zhang
Browse files

finished vec output

parent c1159e3c
......@@ -31,7 +31,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
{
template <typename... Wei,
typename... In,
typename... Add,
typename... Out,
typename ConvStrides,
typename ConvDilations,
......@@ -80,9 +79,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
const auto OutRightPadH = Hop - Ho;
const auto OutRightPadW = Wop - Wo;
const auto AddRightPadH = 2 * OutRightPadH;
const auto AddRightPadW = 2 * OutRightPadW;
const auto InLeftPadH = in_left_pads[I0];
const auto InLeftPadW = in_left_pads[I1];
......@@ -93,8 +89,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
<< std::endl;
std::cerr << "InRightPadH = " << InRightPadH << " InRightPadW = " << InRightPadW
<< std::endl;
std::cerr << "AddRightPadH = " << AddRightPadH << " AddRightPadW = " << AddRightPadW
<< std::endl;
// weight tensor
const auto wei_e_k_global_desc = transform_dynamic_tensor_descriptor(
......
......@@ -386,11 +386,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
static_for<0, CThreadTransferDstScalarPerVector, 1>{}([&](auto i) {
t.template AsType<int8_t>()(i) =
p_c_thread[c_k_n_ho_wo_thread_desc_vec.CalculateOffset(
make_tuple(k_i * CThreadTransferDstScalarPerVector + i,
0,
h_i / 2,
w_i / 2))];
p_c_thread[c_k_n_ho_wo_thread_desc_vec.CalculateOffset(make_tuple(
k_i * CThreadTransferDstScalarPerVector + i, 0, h_i, w_i))];
});
// d_vec.template AsType<FloatC>()(
......
......@@ -625,12 +625,9 @@ int main(int argc, char* argv[])
constexpr auto Ho = out_nkhw_desc.GetLength(Number<2>{});
constexpr auto Wo = out_nkhw_desc.GetLength(Number<3>{});
// auto add_nkhw_desc = make_native_tensor_descriptor_packed(Sequence<N, K, Ho * 2, Wo * 2>{});
ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
// ostream_tensor_descriptor(add_nkhw_desc, std::cout << "add_nkhw_desc: ");
print_array("LeftPads", to_multi_index(LeftPads{}));
print_array("RightPads", to_multi_index(RightPads{}));
......@@ -661,7 +658,6 @@ int main(int argc, char* argv[])
Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment