Commit 5fa209af authored by rocking's avatar rocking
Browse files

Add padding

parent 0f421d6f
...@@ -468,7 +468,6 @@ int main(int argc, char* argv[]) ...@@ -468,7 +468,6 @@ int main(int argc, char* argv[])
auto broadcastDiv_invoker_ptr = broadcastDiv.MakeInvokerPointer(); auto broadcastDiv_invoker_ptr = broadcastDiv.MakeInvokerPointer();
broadcastDiv_invoker_ptr->Run(broadcastDiv_argument_ptr.get(), nrepeat); broadcastDiv_invoker_ptr->Run(broadcastDiv_argument_ptr.get(), nrepeat);
// TODO = do_verification
if(do_verification) if(do_verification)
{ {
std::cout << "verification..." << std::endl; std::cout << "verification..." << std::endl;
......
...@@ -34,10 +34,21 @@ struct DeviceElementwise_2D : public DeviceElementwise<ElementwiseFunctor> ...@@ -34,10 +34,21 @@ struct DeviceElementwise_2D : public DeviceElementwise<ElementwiseFunctor>
make_naive_tensor_descriptor(make_tuple(m, n), make_tuple(stride[0], stride[1])); make_naive_tensor_descriptor(make_tuple(m, n), make_tuple(stride[0], stride[1]));
// 1d desc - [m * n] // 1d desc - [m * n]
return transform_tensor_descriptor(desc_m_n, const auto desc_m0 =
make_tuple(make_merge_transform(make_tuple(m, n))), transform_tensor_descriptor(desc_m_n,
make_tuple(Sequence<0, 1>{}), make_tuple(make_merge_transform(make_tuple(m, n))),
make_tuple(Sequence<0>{})); make_tuple(Sequence<0, 1>{}),
make_tuple(Sequence<0>{}));
// pad
const auto m0 = desc_m0.GetLength(I0);
const auto pad = math::integer_least_multiple(m0, ScalarPerVector) - m0;
const auto desc_m0_pad =
transform_tensor_descriptor(desc_m0,
make_tuple(make_right_pad_transform(m0, pad)),
make_tuple(Sequence<0>{}),
make_tuple(Sequence<0>{}));
return desc_m0_pad;
} }
using GridDesc_M0 = decltype(MakeDescriptor_M0({1, 1}, {1, 1})); using GridDesc_M0 = decltype(MakeDescriptor_M0({1, 1}, {1, 1}));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment