Commit 4b0c3753 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into rewrite_sigmoid_to_hip_kernel
parents c546c166 de1d5919
...@@ -125,6 +125,8 @@ rocm_enable_cppcheck( ...@@ -125,6 +125,8 @@ rocm_enable_cppcheck(
functionConst:*program.* functionConst:*program.*
shadowFunction shadowFunction
shadowVar shadowVar
shadowVariable
unsafeClassDivZero
definePrefix:*test/include/test.hpp definePrefix:*test/include/test.hpp
FORCE FORCE
INCONCLUSIVE INCONCLUSIVE
......
...@@ -99,13 +99,14 @@ rocmtest tidy: rocmnode('rocmtest') { cmake_build -> ...@@ -99,13 +99,14 @@ rocmtest tidy: rocmnode('rocmtest') { cmake_build ->
| xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-5.0 -style=file {} | diff - {}\' | xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-5.0 -style=file {} | diff - {}\'
''' '''
} }
}, clang: rocmnode('vega') { cmake_build -> }, clang_debug: rocmnode('vega') { cmake_build ->
stage('Clang Debug') { stage('Clang Debug') {
// TODO: Enanle integer // TODO: Enable integer
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build("hcc", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'") cmake_build("hcc", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'")
} }
}, clang_release: rocmnode('vega') { cmake_build ->
stage('Clang Release') { stage('Clang Release') {
cmake_build("hcc", "-DCMAKE_BUILD_TYPE=release") cmake_build("hcc", "-DCMAKE_BUILD_TYPE=release")
} }
......
pfultz2/rocm-recipes pfultz2/rocm-recipes
danmar/cppcheck@8aa68ee297c2d9ebadf5bcfd00c66ea8d9291e35 -DHAVE_RULES=1 danmar/cppcheck@ef714225bb31e9a76ac2484796763572386955ae -DHAVE_RULES=1
ROCm-Developer-Tools/HIP@2490e42baa7d90458f0632fd9fbead2d395f41b9 ROCm-Developer-Tools/HIP@2490e42baa7d90458f0632fd9fbead2d395f41b9
python/cpython@v3.6.6 -X autotools -H sha256:92aa914572c695c0aeb01b0a214813f414da4b51a371234df514a74761f2bb36 python/cpython@v3.6.6 -X autotools -H sha256:92aa914572c695c0aeb01b0a214813f414da4b51a371234df514a74761f2bb36
-f requirements.txt -f requirements.txt
...@@ -36,7 +36,7 @@ struct argument : raw_data<argument> ...@@ -36,7 +36,7 @@ struct argument : raw_data<argument>
} }
/// Provides a raw pointer to the data /// Provides a raw pointer to the data
std::function<char*()> data; std::function<char*()> data = nullptr;
/// Whether data is available /// Whether data is available
bool empty() const { return not data; } bool empty() const { return not data; }
......
...@@ -353,14 +353,14 @@ MIGRAPHX_PRED_MATCHER(same_input_shapes, instruction_ref ins) ...@@ -353,14 +353,14 @@ MIGRAPHX_PRED_MATCHER(same_input_shapes, instruction_ref ins)
ins->inputs().begin(), ins->inputs().end(), [&](auto x) { return x->get_shape() == s; }); ins->inputs().begin(), ins->inputs().end(), [&](auto x) { return x->get_shape() == s; });
} }
MIGRAPHX_BASIC_MATCHER(output, matcher_context& ctx, instruction_ref ins) MIGRAPHX_BASIC_MATCHER(output, const matcher_context& ctx, instruction_ref ins)
{ {
if(ins->outputs().size() == 1) if(ins->outputs().size() == 1)
return ins->outputs().front(); return ins->outputs().front();
return ctx.not_found(); return ctx.not_found();
} }
MIGRAPHX_BASIC_MATCHER(used_once, matcher_context& ctx, instruction_ref ins) MIGRAPHX_BASIC_MATCHER(used_once, const matcher_context& ctx, instruction_ref ins)
{ {
if(ins->outputs().size() == 1) if(ins->outputs().size() == 1)
return ins; return ins;
...@@ -411,7 +411,7 @@ inline auto nargs(std::size_t n) ...@@ -411,7 +411,7 @@ inline auto nargs(std::size_t n)
inline auto arg(std::size_t i) inline auto arg(std::size_t i)
{ {
return make_basic_fun_matcher([=](matcher_context& ctx, instruction_ref ins) { return make_basic_fun_matcher([=](const matcher_context& ctx, instruction_ref ins) {
if(i < ins->inputs().size()) if(i < ins->inputs().size())
return ins->inputs()[i]; return ins->inputs()[i];
return ctx.not_found(); return ctx.not_found();
......
#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#include <array>
#include <migraphx/op/common.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct quant_convolution
{
std::array<std::size_t, 2> padding = {{0, 0}};
std::array<std::size_t, 2> stride = {{1, 1}};
std::array<std::size_t, 2> dilation = {{1, 1}};
padding_mode_t padding_mode = default_;
int group = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.padding, "padding"),
f(self.stride, "stride"),
f(self.dilation, "dilation"),
f(self.padding_mode, "padding_mode"),
f(self.group, "group"));
}
std::string name() const { return "quant_convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).same_type().same_ndims().only_dims(4);
const shape& input = inputs.at(0);
const shape& weights = inputs.at(1);
auto t = input.type();
// all input type must be int8_type and output is float_type
if(t != shape::int8_type)
{
MIGRAPHX_THROW("QUANT_CONVOLUTION: only accept input and weights of type int8_t");
}
t = shape::int32_type;
return {t,
{
input.lens()[0],
weights.lens()[0],
std::size_t(std::max<std::ptrdiff_t>(
1,
(input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) +
2 * padding[0]) /
stride[0] +
1)),
std::size_t(std::max<std::ptrdiff_t>(
1,
(input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) +
2 * padding[1]) /
stride[1] +
1)),
}};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_DOT_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_DOT_HPP
#include <array>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct quant_dot
{
int32_t alpha = 1;
int32_t beta = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(as_number(self.alpha), "alpha"), f(as_number(self.beta), "beta"));
}
std::string name() const { return "quant_dot"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type();
const shape& a = inputs.at(0);
const shape& b = inputs.at(1);
auto t = a.type();
if(t != shape::int8_type)
{
MIGRAPHX_THROW("QUANT_DOT: only support data type int8_t");
}
if(!std::all_of(inputs.begin(), inputs.end(), [](auto s) { return s.lens().size() >= 2; }))
{
MIGRAPHX_THROW("QUANT_DOT: dot only accept 2 or more dims operands");
}
// only handle the case that the batch size of a and b are the same
if(!std::equal(
a.lens().rbegin() + 2, a.lens().rend(), b.lens().rbegin() + 2, b.lens().rend()))
{
MIGRAPHX_THROW("QUANT_DOT: batch size of A and B mismatch: {" +
to_string_range(a.lens()) + "} x {" + to_string_range(b.lens()) + "}");
}
std::size_t dim_0 = a.lens().size() - 2;
std::size_t dim_1 = a.lens().size() - 1;
if(a.lens()[dim_1] != b.lens()[dim_0])
{
MIGRAPHX_THROW("QUANT_DOT: inner dimensions do not match: {" +
to_string_range(a.lens()) + "} x {" + to_string_range(b.lens()) + "}");
}
// k be multiple of 4
if((a.lens()[dim_1] % 4) != 0)
{
MIGRAPHX_THROW("QUANT_DOT: size of A {" + to_string_range(a.lens()) + "} and B {" +
to_string_range(b.lens()) + "} must be multiple of 4 for int8 type");
}
auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1];
if(inputs.size() == 3 && out_lens != inputs.at(2).lens())
{
MIGRAPHX_THROW("QUANT_DOT: dimension mismatch, operand C: {" +
to_string_range(inputs.at(2).lens()) +
"}, cannot add to operand A * B: {" + to_string_range(out_lens) + "}");
}
if(inputs.size() == 3 && inputs.at(2).type() != shape::int32_type)
{
MIGRAPHX_THROW("QUANT_DOT: operand C type must be int32");
}
return {shape::int32_type, out_lens};
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -45,6 +45,8 @@ ...@@ -45,6 +45,8 @@
#include <migraphx/op/outline.hpp> #include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp> #include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp> #include <migraphx/op/pooling.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/pow.hpp> #include <migraphx/op/pow.hpp>
#include <migraphx/op/reduce_sum.hpp> #include <migraphx/op/reduce_sum.hpp>
#include <migraphx/op/reduce_mean.hpp> #include <migraphx/op/reduce_mean.hpp>
......
...@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n ...@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n
{ {
double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance; double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance;
auto error = rms_range(r1, r2); auto error = rms_range(r1, r2);
// cppcheck-suppress uninitvar
if(out_error != nullptr) if(out_error != nullptr)
*out_error = error; *out_error = error;
return error <= threshold; return error <= threshold;
......
...@@ -1011,9 +1011,10 @@ struct onnx_parser ...@@ -1011,9 +1011,10 @@ struct onnx_parser
} }
std::vector<operation> vec_actv_funcs(vec_names.size()); std::vector<operation> vec_actv_funcs(vec_names.size());
std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& fn) { std::transform(vec_names.begin(),
return map_actv_funcs[fn]; vec_names.end(),
}); vec_actv_funcs.begin(),
[&](const auto& fn) { return map_actv_funcs[fn]; });
// To be added later // To be added later
float clip = 0.0; float clip = 0.0;
...@@ -1127,9 +1128,10 @@ struct onnx_parser ...@@ -1127,9 +1128,10 @@ struct onnx_parser
} }
std::vector<operation> vec_actv_funcs(vec_names.size()); std::vector<operation> vec_actv_funcs(vec_names.size());
std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& name) { std::transform(vec_names.begin(),
return map_actv_funcs[name]; vec_names.end(),
}); vec_actv_funcs.begin(),
[&](const auto& name) { return map_actv_funcs[name]; });
float clip = 0.0; float clip = 0.0;
if(contains(attributes, "clip")) if(contains(attributes, "clip"))
...@@ -1299,9 +1301,10 @@ struct onnx_parser ...@@ -1299,9 +1301,10 @@ struct onnx_parser
} }
std::vector<operation> vec_actv_funcs(vec_names.size()); std::vector<operation> vec_actv_funcs(vec_names.size());
std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& name) { std::transform(vec_names.begin(),
return map_actv_funcs[name]; vec_names.end(),
}); vec_actv_funcs.begin(),
[&](const auto& name) { return map_actv_funcs[name]; });
float clip = 0.0; float clip = 0.0;
if(contains(attributes, "clip")) if(contains(attributes, "clip"))
......
...@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval) ...@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
offset += (element_size - (offset % element_size)); offset += (element_size - (offset % element_size));
conflict_queue.pop(); conflict_queue.pop();
} }
// when int8 type is used, the offset could be any number
// if not 4-byte aligned, miopen int8 convolution can crash
offset = (offset + 3) / 4 * 4;
segment.offset = offset; segment.offset = offset;
MIGRAPHX_DEBUG(segment.dump()); MIGRAPHX_DEBUG(segment.dump());
required_bytes = std::max(required_bytes, offset + segment.size); required_bytes = std::max(required_bytes, offset + segment.size);
......
...@@ -107,7 +107,7 @@ struct memory_coloring_impl ...@@ -107,7 +107,7 @@ struct memory_coloring_impl
return ins->name() == "check_context"; return ins->name() == "check_context";
} }
static bool is_disjoin(live_range& range1, live_range& range2) static bool is_disjoin(const live_range& range1, const live_range& range2)
{ {
if((range1.size == 0) || (range2.size == 0)) if((range1.size == 0) || (range2.size == 0))
return false; return false;
......
...@@ -241,7 +241,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_ ...@@ -241,7 +241,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_
// TODO: Check every element // TODO: Check every element
assert(has_instruction(first)); assert(has_instruction(first));
std::for_each(first, last, [&](instruction& ins) { ins.clear_arguments(); }); std::for_each(first, last, [&](instruction& ins) { ins.clear_arguments(); });
assert(std::all_of(first, last, [&](instruction& ins) { return ins.outputs().empty(); })); assert(std::all_of(first, last, [&](const instruction& ins) { return ins.outputs().empty(); }));
return impl->instructions.erase(first, last); return impl->instructions.erase(first, last);
} }
......
...@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const ...@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
std::vector<float> ihc_data(ihc_shape.elements(), 0.0); std::vector<float> ihc_data(ihc_shape.elements(), 0.0);
migraphx::shape pph_shape{type, {1, 3 * hidden_size}}; migraphx::shape pph_shape{type, {1, 3 * hidden_size}};
std::vector<float> pph_data(pph_shape.elements(), 0.0);
auto actv_funcs = lstm_actv_funcs(ins); auto actv_funcs = lstm_actv_funcs(ins);
auto lstm_op = any_cast<op::lstm>(ins->get_operator()); auto lstm_op = any_cast<op::lstm>(ins->get_operator());
......
...@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type ...@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
{ {
}; };
template <class T> template <class T, class F>
void migemm_impl(tensor_view<T> cmat, void migemm_impl(
tensor_view<T> amat, tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta, std::true_type)
tensor_view<T> bmat,
float alpha,
float beta,
std::true_type)
{ {
visit_mat(amat, [&](const auto& a) { visit_mat(amat, [&](const auto& a) {
visit_mat(bmat, [&](const auto& b) { visit_mat(bmat, [&](const auto& b) {
...@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat, ...@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
}); });
} }
template <class T> template <class T, class F>
void migemm_impl(tensor_view<T> cmat, void migemm_impl(
tensor_view<T> amat, tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta, std::false_type)
tensor_view<T> bmat,
float alpha,
float beta,
std::false_type)
{ {
std::size_t n_dims = cmat.get_shape().lens().size(); std::size_t n_dims = cmat.get_shape().lens().size();
std::size_t dim_0 = n_dims - 2; std::size_t dim_0 = n_dims - 2;
...@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat, ...@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
}); });
} }
template <class T> template <class T, class F>
void migemm_impl( void migemm_impl(tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta)
tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, float alpha, float beta)
{ {
auto lens = amat.get_shape().lens(); auto lens = amat.get_shape().lens();
bool batch_mul = bool batch_mul =
...@@ -113,13 +104,29 @@ void migemm_impl( ...@@ -113,13 +104,29 @@ void migemm_impl(
} }
} }
void migemm( template <class F>
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta) void migemm_tpl(
const argument& c_arg, const argument& a_arg, const argument& b_arg, F alpha, F beta)
{ {
visit_all(c_arg, a_arg, b_arg)( visit_all(c_arg, a_arg, b_arg)(
[&](auto cmat, auto amat, auto bmat) { migemm_impl(cmat, amat, bmat, alpha, beta); }); [&](auto cmat, auto amat, auto bmat) { migemm_impl(cmat, amat, bmat, alpha, beta); });
} }
void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta)
{
migemm_tpl(c_arg, a_arg, b_arg, alpha, beta);
}
void migemm(const argument& c_arg,
const argument& a_arg,
const argument& b_arg,
int32_t alpha,
int32_t beta)
{
migemm_tpl(c_arg, a_arg, b_arg, alpha, beta);
}
} // namespace cpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -10,6 +10,11 @@ namespace cpu { ...@@ -10,6 +10,11 @@ namespace cpu {
void migemm( void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta); const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta);
void migemm(const argument& c_arg,
const argument& a_arg,
const argument& b_arg,
int32_t alpha,
int32_t beta);
} // namespace cpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
#include <migraphx/dfor.hpp> #include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp> #include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp> #include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp> #include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp> #include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp> #include <migraphx/op/leaky_relu.hpp>
...@@ -216,6 +218,60 @@ struct cpu_convolution ...@@ -216,6 +218,60 @@ struct cpu_convolution
} }
}; };
struct cpu_quant_convolution
{
op::quant_convolution op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::quant_convolution"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
auto output = result.get<int32_t>();
visit_all(args[0], args[1])([&](auto input, auto weights) {
auto in = input.get_shape().lens();
auto in_h = in[2];
auto in_w = in[3];
auto wei = weights.get_shape().lens();
auto wei_n = wei[0];
auto wei_c = wei[1];
auto wei_h = wei[2];
auto wei_w = wei[3];
par_dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const auto start_x = i * op.stride[0] - op.padding[0];
const auto start_y = j * op.stride[1] - op.padding[1];
const auto group_id = w / (wei_n / op.group);
int32_t acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const auto in_x = start_x + x;
const auto in_y = start_y + y;
const auto in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
}
});
output(o, w, i, j) = acc;
});
});
return result;
}
};
struct cpu_im2col struct cpu_im2col
{ {
op::im2col op; op::im2col op;
...@@ -245,17 +301,17 @@ struct cpu_im2col ...@@ -245,17 +301,17 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0]; const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1]; const std::size_t& stride_w = op.stride[1];
auto kdiv2_h = kernel_h / 2; long kdiv2_h = long(kernel_h) / 2;
auto kdiv2_w = kernel_w / 2; long kdiv2_w = long(kernel_w) / 2;
// calculate output sizes // calculate output sizes
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1; const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1; const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
// account for padding for the starting position of the input pixels // account for padding for the starting position of the input pixels
std::size_t iinput = kdiv2_h - pad_h; long iinput = kdiv2_h - long(pad_h);
// loop over output pixels (ioutput, joutput) // loop over output pixels (ioutput, joutput)
for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h) for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
{ {
std::size_t jinput = kdiv2_w - pad_w; long jinput = kdiv2_w - long(pad_w);
for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w) for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
{ {
// compute linear index for output // compute linear index for output
...@@ -264,8 +320,8 @@ struct cpu_im2col ...@@ -264,8 +320,8 @@ struct cpu_im2col
dfor(channels, dfor(channels,
kernel_h, kernel_h,
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) { kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
auto idx = iinput + koffset - kdiv2_h; auto idx = iinput + long(koffset) - kdiv2_h;
auto jdx = jinput + loffset - kdiv2_w; auto jdx = jinput + long(loffset) - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width)) col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
? input(0, c, idx, jdx) ? input(0, c, idx, jdx)
: 0; : 0;
...@@ -433,7 +489,7 @@ struct cpu_gemm ...@@ -433,7 +489,7 @@ struct cpu_gemm
{ {
argument result{output_shape}; argument result{output_shape};
// 3 inputs, it is alpha * A * B + beta * C, then // 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is broadcastable to A * B // A and B are matrices, and C is of the same shape as A * B
if(args.size() == 3) if(args.size() == 3)
{ {
// no need to consider the value of args[2] // no need to consider the value of args[2]
...@@ -460,13 +516,80 @@ struct cpu_gemm ...@@ -460,13 +516,80 @@ struct cpu_gemm
} }
}; };
struct cpu_quant_gemm
{
op::quant_dot op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::quant_dot"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
if(inputs.size() == 3)
{
auto c_shape = inputs.at(2);
check_shapes{{c_shape}}.not_broadcasted();
}
return op.compute_shape(inputs);
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t
argument arg_0{{shape::int32_type, {args.at(0).get_shape().lens()}}};
argument arg_1{{shape::int32_type, {args.at(1).get_shape().lens()}}};
arg_0.visit([&](auto output) {
args.at(0).visit(
[&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
});
arg_1.visit([&](auto output) {
args.at(1).visit(
[&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
});
if(args.size() == 3)
{
// no need to consider the value of args[2]
if(op.beta == 0)
{
result.visit([&](auto output) { std::fill(output.begin(), output.end(), 0); });
}
else
{
visit_all(result, args[2])([&](auto output, auto input) {
std::copy(input.begin(), input.end(), output.begin());
});
}
migemm(result, arg_0, arg_1, op.alpha, op.beta);
return result;
}
// 2 input arguments
int32_t beta = 0;
migemm(result, arg_0, arg_1, op.alpha, beta);
return result;
}
};
struct leaky_relu_op struct leaky_relu_op
{ {
op::leaky_relu op; op::leaky_relu op;
std::string name() const { return "cpu::leaky_relu"; } std::string name() const { return "cpu::leaky_relu"; }
auto fcn() const auto fcn() const
{ {
auto& a = op.alpha; auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : x * a; }; return [a](auto x) { return x > 0 ? x : x * a; };
} }
}; };
...@@ -477,7 +600,7 @@ struct elu_op ...@@ -477,7 +600,7 @@ struct elu_op
std::string name() const { return "cpu::elu"; } std::string name() const { return "cpu::elu"; }
auto fcn() const auto fcn() const
{ {
auto& a = op.alpha; auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : a * std::expm1(x); }; return [a](auto x) { return x > 0 ? x : a * std::expm1(x); };
} }
}; };
...@@ -671,15 +794,17 @@ struct cpu_apply ...@@ -671,15 +794,17 @@ struct cpu_apply
{ {
apply_map["batch_norm_inference"] = apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, op::batch_norm_inference>(); extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>(); apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
apply_map["dot"] = extend_op<cpu_gemm, op::dot>(); apply_map["dot"] = extend_op<cpu_gemm, op::dot>();
apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>(); apply_map["quant_dot"] = extend_op<cpu_quant_gemm, op::quant_dot>();
apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>(); apply_map["quant_convolution"] = extend_op<cpu_quant_convolution, op::quant_convolution>();
apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>(); apply_map["elu"] = extend_op<cpu_unary<elu_op>, op::elu>();
apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>(); apply_map["im2col"] = extend_op<cpu_im2col, op::im2col>();
apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>(); apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>(); apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
apply_map["softmax"] = extend_op<cpu_softmax, op::softmax>(); apply_map["lrn"] = extend_op<cpu_lrn, op::lrn>();
apply_map["pad"] = extend_op<cpu_pad, op::pad>();
apply_map["softmax"] = extend_op<cpu_softmax, op::softmax>();
} }
void apply() void apply()
......
...@@ -40,6 +40,7 @@ add_library(migraphx_device ...@@ -40,6 +40,7 @@ add_library(migraphx_device
device/pad.cpp device/pad.cpp
device/gather.cpp device/gather.cpp
device/sub.cpp device/sub.cpp
device/int8_gemm_pack.cpp
device/div.cpp device/div.cpp
device/clip.cpp device/clip.cpp
device/reduce_sum.cpp device/reduce_sum.cpp
...@@ -65,8 +66,10 @@ add_library(migraphx_gpu ...@@ -65,8 +66,10 @@ add_library(migraphx_gpu
target.cpp target.cpp
lowering.cpp lowering.cpp
gemm.cpp gemm.cpp
quant_gemm.cpp
pooling.cpp pooling.cpp
convolution.cpp convolution.cpp
quant_convolution.cpp
softmax.cpp softmax.cpp
logsoftmax.cpp logsoftmax.cpp
contiguous.cpp contiguous.cpp
...@@ -82,9 +85,12 @@ add_library(migraphx_gpu ...@@ -82,9 +85,12 @@ add_library(migraphx_gpu
lrn.cpp lrn.cpp
schedule_model.cpp schedule_model.cpp
adjust_allocation.cpp adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp clip.cpp
reduce_sum.cpp reduce_sum.cpp
reduce_mean.cpp reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
) )
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraphx_gpu) rocm_clang_tidy_check(migraphx_gpu)
......
...@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op) ...@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)
__device__ inline void dpp_reduce(float& x, sum) __device__ inline void dpp_reduce(float& x, sum)
{ {
#ifdef MIGRAPHX_USE_CLANG_TIDY #if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
(void)x; x = 1;
#else #else
__asm__ volatile("s_nop 4\n" __asm__ volatile("s_nop 4\n"
"v_add_f32 %0 %0 %0 row_shr:1\n" "v_add_f32 %0 %0 %0 row_shr:1\n"
......
...@@ -31,6 +31,7 @@ struct hip_tensor_descriptor ...@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
result[is] = tidx / strides[is]; result[is] = tidx / strides[is];
tidx = tidx % strides[is]; tidx = tidx % strides[is];
} }
return result; return result;
} }
__device__ __host__ std::size_t linear(hip_tensor_index<NDim> s) const __device__ __host__ std::size_t linear(hip_tensor_index<NDim> s) const
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment