Commit 28ab5f76 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

fix cppcheck error.

parent cade36d1
...@@ -13,18 +13,18 @@ namespace device { ...@@ -13,18 +13,18 @@ namespace device {
void pack_a(hipStream_t stream, const argument& result, const argument& arg) void pack_a(hipStream_t stream, const argument& result, const argument& arg)
{ {
auto output_shape = result.get_shape(); auto comp_shape = arg.get_shape();
auto out_lens = output_shape.lens(); auto out_lens = comp_shape.lens();
auto dim_0 = out_lens.size() - 2; auto dim_0 = out_lens.size() - 2;
auto dim_1 = out_lens.size() - 1; auto dim_1 = out_lens.size() - 1;
std::size_t lda = output_shape.strides()[dim_0]; std::size_t lda = comp_shape.strides()[dim_0];
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1]; std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements)([=](auto ii) { gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
...@@ -40,7 +40,7 @@ void pack_a(hipStream_t stream, const argument& result, const argument& arg) ...@@ -40,7 +40,7 @@ void pack_a(hipStream_t stream, const argument& result, const argument& arg)
void pack_b(hipStream_t stream, const argument& result, const argument& arg) void pack_b(hipStream_t stream, const argument& result, const argument& arg)
{ {
auto trans_shape = result.get_shape(); auto trans_shape = arg.get_shape();
auto out_lens = trans_shape.lens(); auto out_lens = trans_shape.lens();
auto dim_0 = trans_shape.lens().size() - 2; auto dim_0 = trans_shape.lens().size() - 2;
auto dim_1 = trans_shape.lens().size() - 1; auto dim_1 = trans_shape.lens().size() - 1;
...@@ -48,14 +48,14 @@ void pack_b(hipStream_t stream, const argument& result, const argument& arg) ...@@ -48,14 +48,14 @@ void pack_b(hipStream_t stream, const argument& result, const argument& arg)
auto wrap_lens = out_lens; auto wrap_lens = out_lens;
std::swap(wrap_lens[dim_0], wrap_lens[dim_1]); std::swap(wrap_lens[dim_0], wrap_lens[dim_1]);
shape output_shape{trans_shape.type(), wrap_lens}; shape comp_shape{trans_shape.type(), wrap_lens};
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1]; std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) { visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements(); std::size_t nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data()); auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements)([=](auto ii) { gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
......
...@@ -13,8 +13,8 @@ struct context; ...@@ -13,8 +13,8 @@ struct context;
struct miopen_quant_gemm struct miopen_quant_gemm
{ {
op::quant_dot op; op::quant_dot op;
argument arg_a;
miopen_quant_gemm(op::quant_dot qop) : op(qop) {} argument arg_b;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
...@@ -31,9 +31,6 @@ struct miopen_quant_gemm ...@@ -31,9 +31,6 @@ struct miopen_quant_gemm
return shapes.size() - 1; return shapes.size() - 1;
} }
private:
argument arg_a;
argument arg_b;
}; };
} // namespace gpu } // namespace gpu
......
...@@ -99,7 +99,6 @@ struct miopen_apply ...@@ -99,7 +99,6 @@ struct miopen_apply
add_generic_op<hip_min>("min"); add_generic_op<hip_min>("min");
add_extend_op<miopen_gemm, op::dot>("dot"); add_extend_op<miopen_gemm, op::dot>("dot");
add_extend_op<miopen_quant_gemm, op::quant_dot>("quant_dot");
add_extend_op<miopen_contiguous, op::contiguous>("contiguous"); add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
add_extend_op<hip_concat, op::concat>("concat"); add_extend_op<hip_concat, op::concat>("concat");
add_extend_op<miopen_softmax, op::softmax>("softmax"); add_extend_op<miopen_softmax, op::softmax>("softmax");
...@@ -112,6 +111,7 @@ struct miopen_apply ...@@ -112,6 +111,7 @@ struct miopen_apply
add_lrn_op(); add_lrn_op();
add_convolution_op(); add_convolution_op();
add_quant_convolution_op(); add_quant_convolution_op();
add_quant_dot_op();
add_pooling_op(); add_pooling_op();
add_batch_norm_inference_op(); add_batch_norm_inference_op();
} }
...@@ -174,6 +174,23 @@ struct miopen_apply ...@@ -174,6 +174,23 @@ struct miopen_apply
}); });
} }
void add_quant_dot_op()
{
apply_map.emplace("quant_dot", [=](instruction_ref ins) {
auto&& op = any_cast<op::quant_dot>(ins->get_operator());
auto inputs = ins->inputs();
auto in_shapes = to_shapes(inputs);
auto arg_a = allocate_gpu(in_shapes[0]);
auto arg_b = allocate_gpu(in_shapes[1]);
auto quant_dot = miopen_quant_gemm{op, arg_a, arg_b};
auto output = insert_allocation(ins, ins->get_shape());
inputs.push_back(output);
return prog->replace_instruction(ins, quant_dot, inputs);
});
}
void add_pooling_op() void add_pooling_op()
{ {
apply_map.emplace("pooling", [=](instruction_ref ins) { apply_map.emplace("pooling", [=](instruction_ref ins) {
......
...@@ -57,26 +57,6 @@ shape miopen_quant_gemm::compute_shape(const std::vector<shape>& inputs) const ...@@ -57,26 +57,6 @@ shape miopen_quant_gemm::compute_shape(const std::vector<shape>& inputs) const
std::vector<shape> input_shapes(inputs); std::vector<shape> input_shapes(inputs);
input_shapes.pop_back(); input_shapes.pop_back();
check_shapes{input_shapes}.not_broadcasted(); check_shapes{input_shapes}.not_broadcasted();
bool transa = inputs[0].transposed();
bool transb = inputs[1].transposed();
if(!transb)
{
if(arg_b.empty())
{
auto* p_this = const_cast<miopen_quant_gemm*>(this);
p_this->arg_b = allocate_gpu(inputs[1]);
}
}
if(transa)
{
if(arg_a.empty())
{
auto* p_this = const_cast<miopen_quant_gemm*>(this);
p_this->arg_a = allocate_gpu(inputs[0]);
}
}
return op.compute_shape(input_shapes); return op.compute_shape(input_shapes);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment