Commit 28ab5f76 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

fix cppcheck error.

parent cade36d1
......@@ -13,18 +13,18 @@ namespace device {
void pack_a(hipStream_t stream, const argument& result, const argument& arg)
{
auto output_shape = result.get_shape();
auto out_lens = output_shape.lens();
auto comp_shape = arg.get_shape();
auto out_lens = comp_shape.lens();
auto dim_0 = out_lens.size() - 2;
auto dim_1 = out_lens.size() - 1;
std::size_t lda = output_shape.strides()[dim_0];
std::size_t lda = comp_shape.strides()[dim_0];
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
std::size_t nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape);
hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4;
auto idx = desc.multi(ii);
......@@ -40,7 +40,7 @@ void pack_a(hipStream_t stream, const argument& result, const argument& arg)
void pack_b(hipStream_t stream, const argument& result, const argument& arg)
{
auto trans_shape = result.get_shape();
auto trans_shape = arg.get_shape();
auto out_lens = trans_shape.lens();
auto dim_0 = trans_shape.lens().size() - 2;
auto dim_1 = trans_shape.lens().size() - 1;
......@@ -48,14 +48,14 @@ void pack_b(hipStream_t stream, const argument& result, const argument& arg)
auto wrap_lens = out_lens;
std::swap(wrap_lens[dim_0], wrap_lens[dim_1]);
shape output_shape{trans_shape.type(), wrap_lens};
shape comp_shape{trans_shape.type(), wrap_lens};
std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
visit_all(result, arg)([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
std::size_t nelements = comp_shape.elements();
auto* out_ptr = device_cast(output.data());
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(output_shape);
hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements)([=](auto ii) {
const size_t nb = 4;
auto idx = desc.multi(ii);
......
......@@ -13,8 +13,8 @@ struct context;
struct miopen_quant_gemm
{
op::quant_dot op;
miopen_quant_gemm(op::quant_dot qop) : op(qop) {}
argument arg_a;
argument arg_b;
template <class Self, class F>
static auto reflect(Self& self, F f)
......@@ -31,9 +31,6 @@ struct miopen_quant_gemm
return shapes.size() - 1;
}
private:
argument arg_a;
argument arg_b;
};
} // namespace gpu
......
......@@ -99,7 +99,6 @@ struct miopen_apply
add_generic_op<hip_min>("min");
add_extend_op<miopen_gemm, op::dot>("dot");
add_extend_op<miopen_quant_gemm, op::quant_dot>("quant_dot");
add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
add_extend_op<hip_concat, op::concat>("concat");
add_extend_op<miopen_softmax, op::softmax>("softmax");
......@@ -112,6 +111,7 @@ struct miopen_apply
add_lrn_op();
add_convolution_op();
add_quant_convolution_op();
add_quant_dot_op();
add_pooling_op();
add_batch_norm_inference_op();
}
......@@ -174,6 +174,23 @@ struct miopen_apply
});
}
void add_quant_dot_op()
{
apply_map.emplace("quant_dot", [=](instruction_ref ins) {
auto&& op = any_cast<op::quant_dot>(ins->get_operator());
auto inputs = ins->inputs();
auto in_shapes = to_shapes(inputs);
auto arg_a = allocate_gpu(in_shapes[0]);
auto arg_b = allocate_gpu(in_shapes[1]);
auto quant_dot = miopen_quant_gemm{op, arg_a, arg_b};
auto output = insert_allocation(ins, ins->get_shape());
inputs.push_back(output);
return prog->replace_instruction(ins, quant_dot, inputs);
});
}
void add_pooling_op()
{
apply_map.emplace("pooling", [=](instruction_ref ins) {
......
......@@ -57,26 +57,6 @@ shape miopen_quant_gemm::compute_shape(const std::vector<shape>& inputs) const
std::vector<shape> input_shapes(inputs);
input_shapes.pop_back();
check_shapes{input_shapes}.not_broadcasted();
bool transa = inputs[0].transposed();
bool transb = inputs[1].transposed();
if(!transb)
{
if(arg_b.empty())
{
auto* p_this = const_cast<miopen_quant_gemm*>(this);
p_this->arg_b = allocate_gpu(inputs[1]);
}
}
if(transa)
{
if(arg_a.empty())
{
auto* p_this = const_cast<miopen_quant_gemm*>(this);
p_this->arg_a = allocate_gpu(inputs[0]);
}
}
return op.compute_shape(input_shapes);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment