Commit a9469403 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

clang format

parent e3950e2c
...@@ -11,46 +11,44 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -11,46 +11,44 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
void pack_a(hipStream_t stream, void pack_a(hipStream_t stream, const argument& result, const argument& arg)
const argument& result, const argument& arg)
{ {
auto output_shape = result.get_shape(); auto output_shape = result.get_shape();
auto dim_0 = output_shape.lens().size() - 2; auto dim_0 = output_shape.lens().size() - 2;
std::size_t ldb = output_shape.strides()[dim_0]; std::size_t ldb = output_shape.strides()[dim_0];
visit_all(result, arg) ([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = output_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(output_shape.lens().size(), [&](auto out_dim) { visit_tensor_size(output_shape.lens().size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape); hip_tensor_descriptor<out_dim> desc(output_shape);
gs_launch(stream, nelements)([=](auto ii) { gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
std::size_t i_m = idx[0]; std::size_t i_m = idx[0];
std::size_t i_k = idx[1]; std::size_t i_k = idx[1];
out_ptr[i_k % nb + (i_m + (i_k / nb) * ldb) * nb] = in_ptr[i_m + i_k * ldb]; out_ptr[i_k % nb + (i_m + (i_k / nb) * ldb) * nb] = in_ptr[i_m + i_k * ldb];
}); });
}); });
}); });
} }
void pack_b(hipStream_t stream, void pack_b(hipStream_t stream, const argument& result, const argument& arg)
const argument& result, const argument& arg)
{ {
auto output_shape = result.get_shape(); auto output_shape = result.get_shape();
auto dim_1 = output_shape.lens().size() - 1; auto dim_1 = output_shape.lens().size() - 1;
std::size_t lda = output_shape.strides()[dim_1]; std::size_t lda = output_shape.strides()[dim_1];
visit_all(result, arg) ([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = output_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(output_shape.lens().size(), [&](auto out_dim) { visit_tensor_size(output_shape.lens().size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape); hip_tensor_descriptor<out_dim> desc(output_shape);
gs_launch(stream, nelements)([=](auto ii) { gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
std::size_t i_n = idx[0]; std::size_t i_n = idx[0];
std::size_t i_k = idx[1]; std::size_t i_k = idx[1];
out_ptr[i_k % nb + (i_n + (i_k / nb) * lda) * nb] = in_ptr[i_n + i_k * lda]; out_ptr[i_k % nb + (i_n + (i_k / nb) * lda) * nb] = in_ptr[i_n + i_k * lda];
}); });
}); });
......
...@@ -10,11 +10,9 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -10,11 +10,9 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
void pack_a(hipStream_t stream, void pack_a(hipStream_t stream, const argument& result, const argument& arg);
const argument& result, const argument& arg);
void pack_b(hipStream_t stream, void pack_b(hipStream_t stream, const argument& result, const argument& arg);
const argument& result, const argument& arg);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -17,7 +17,10 @@ struct miopen_quant_gemm ...@@ -17,7 +17,10 @@ struct miopen_quant_gemm
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
struct hip_pack struct hip_pack
...@@ -26,7 +29,10 @@ struct hip_pack ...@@ -26,7 +29,10 @@ struct hip_pack
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; } std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
}; };
} // namespace gpu } // namespace gpu
......
...@@ -272,9 +272,9 @@ struct miopen_apply ...@@ -272,9 +272,9 @@ struct miopen_apply
std::vector<instruction_ref> refs = ins->inputs(); std::vector<instruction_ref> refs = ins->inputs();
refs.push_back(output); refs.push_back(output);
// Need another two buffers for packed data buffer // Need another two buffers for packed data buffer
auto shape_a = refs.at(0)->get_shape(); auto shape_a = refs.at(0)->get_shape();
if (shape_a.transposed()) if(shape_a.transposed())
{ {
auto pack_a = insert_allocation(ins, shape_a); auto pack_a = insert_allocation(ins, shape_a);
refs.push_back(pack_a); refs.push_back(pack_a);
...@@ -282,7 +282,7 @@ struct miopen_apply ...@@ -282,7 +282,7 @@ struct miopen_apply
} }
auto shape_b = refs.at(1)->get_shape(); auto shape_b = refs.at(1)->get_shape();
if (!shape_b.transposed()) if(!shape_b.transposed())
{ {
auto pack_b = insert_allocation(ins, shape_b); auto pack_b = insert_allocation(ins, shape_b);
refs.push_back(pack_b); refs.push_back(pack_b);
......
...@@ -82,7 +82,7 @@ argument miopen_quant_gemm::compute(context& ctx, ...@@ -82,7 +82,7 @@ argument miopen_quant_gemm::compute(context& ctx,
device::pack_a(ctx.get_stream().get(), args[1], arg_b); device::pack_a(ctx.get_stream().get(), args[1], arg_b);
} }
// need to pack A in this scenario, use the algorithm to pack B in the // need to pack A in this scenario, use the algorithm to pack B in the
// comment of the API // comment of the API
if(transa) if(transa)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment