Commit 0369e974 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'batch_report' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into mi100_opts

parents 3a474fca d70fd0df
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class... Ts>
using common_type = typename std::common_type<Ts...>::type;
template <class T, MIGRAPHX_REQUIRES(is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return std::isfinite(x) and std::isfinite(y) and
std::nextafter(x, std::numeric_limits<T>::lowest()) <= y and
std::nextafter(x, std::numeric_limits<T>::max()) >= y;
}
template <class T, MIGRAPHX_REQUIRES(not is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return x == y;
}
template <class T, class U>
__device__ bool float_equal(T x, U y)
{
return float_equal_device<common_type<T, U>>(x, y);
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -129,6 +129,21 @@ __device__ __host__ T to_hip_type(T x) ...@@ -129,6 +129,21 @@ __device__ __host__ T to_hip_type(T x)
// Hip doens't support __fp16 // Hip doens't support __fp16
inline __device__ __host__ float to_hip_type(gpu_half x) { return x; } inline __device__ __host__ float to_hip_type(gpu_half x) { return x; }
#define MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(trait, T) \
template <class X> \
struct trait : std::trait<X> \
{ \
}; \
\
template <> \
struct trait<T> : std::true_type \
{ \
};
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, __fp16)
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
#include <migraphx/gpu/device/nonzero.hpp>
#include <migraphx/gpu/device/float_equal.hpp>
#include <migraphx/gpu/device/scan.hpp>
#include <migraphx/gpu/device/reduce_ops.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data)
{
auto s = arg_data.get_shape();
auto elem_num = s.elements();
auto out_elem_num = result.get_shape().elements();
// call the prefix_sum function to do a prefix_sum to compute
// index in the output. Only 1 block can be used since we have
// only one prefix sum
const index_int block_size = 256;
hip_visit_all(arg_data, s)([&](auto input, auto si) {
const auto* in_ptr = device_cast(input.data());
auto* ptr = result.cast<int64_t>();
gs_launch(stream, block_size, block_size)([=](auto, auto idx) __device__ {
// fill all output to 0 first
idx.local_stride(out_elem_num, [&](auto j) { ptr[j] = 0; });
block_scan<block_size>(idx,
sum{},
0,
elem_num,
[&](auto j) { return (float_equal(in_ptr[j], 0)) ? 0 : 1; },
[&](auto j, auto x) {
auto out_loc = x - 1;
if(float_equal(in_ptr[j], 0))
return;
auto index = si.multi(j);
for(size_t k = 0; k < index.size(); ++k)
{
ptr[k * elem_num + out_loc] = index[k];
}
});
});
});
return result;
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/clip.hpp> #include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/oper.hpp> #include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/add.hpp> #include <migraphx/gpu/add.hpp>
#include <migraphx/gpu/mul.hpp> #include <migraphx/gpu/mul.hpp>
...@@ -26,6 +27,7 @@ ...@@ -26,6 +27,7 @@
#include <migraphx/array.hpp> #include <migraphx/array.hpp>
#include <migraphx/op/clip.hpp> #include <migraphx/op/clip.hpp>
#include <cmath> #include <cmath>
#include <set>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -152,6 +154,12 @@ struct fusion ...@@ -152,6 +154,12 @@ struct fusion
} }
}; };
const std::unordered_set<std::string>& get_supported_archs()
{
static std::unordered_set<std::string> supported_archs{"gfx900", "gfx906", "gfx908", "gfx1030"};
return supported_archs;
}
MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins) MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
{ {
auto&& s = ins->get_shape(); auto&& s = ins->get_shape();
...@@ -161,6 +169,9 @@ MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins) ...@@ -161,6 +169,9 @@ MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins) MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
{ {
const auto device_name = split_string(get_device_name(), ':').front();
if(not contains(get_supported_archs(), device_name))
return false;
if(enabled(MIGRAPHX_DISABLE_MIOPEN_FUSION{})) if(enabled(MIGRAPHX_DISABLE_MIOPEN_FUSION{}))
return false; return false;
if(ins->name() != "gpu::convolution") if(ins->name() != "gpu::convolution")
...@@ -717,7 +728,7 @@ struct find_gemm_add ...@@ -717,7 +728,7 @@ struct find_gemm_add
auto gemm = any_cast<rocblas_gemm<op::dot>>(gemm_ins->get_operator()); auto gemm = any_cast<rocblas_gemm<op::dot>>(gemm_ins->get_operator());
// Already fused gemm // Already fused gemm
if(not float_equal(gemm.op.beta, 0)) if(not float_equal(gemm.beta, 0))
return; return;
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto i) { if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto i) {
...@@ -738,7 +749,7 @@ struct find_gemm_add ...@@ -738,7 +749,7 @@ struct find_gemm_add
inputs.push_back(copy_ins); inputs.push_back(copy_ins);
inputs.push_back(copy_ins); inputs.push_back(copy_ins);
gemm.op.beta = 1; gemm.beta = 1;
p.replace_instruction(ins, gemm, inputs); p.replace_instruction(ins, gemm, inputs);
} }
}; };
......
...@@ -15,6 +15,10 @@ namespace gpu { ...@@ -15,6 +15,10 @@ namespace gpu {
std::vector<std::vector<char>> std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch); compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch);
std::string enum_params(std::size_t count, std::string param);
std::size_t compute_global(std::size_t n, std::size_t local = 1024);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
operation compile_roialign(context& ctx, const std::vector<shape>& io_shapes, const value& val);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_REMAP_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#define MIGRAPHX_GUARD_RTGLIB_REMAP_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#include <string> #include <migraphx/argument.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
struct module; argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data);
/**
* Decompose operators.
*/
struct remap
{
std::string name() const { return "remap"; }
void apply(module& p) const;
};
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP #define MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP
#include <migraphx/errors.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/value.hpp>
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
...@@ -19,13 +22,17 @@ template <class Op> ...@@ -19,13 +22,17 @@ template <class Op>
struct rocblas_gemm struct rocblas_gemm
{ {
Op op; Op op;
float alpha = 1;
float beta = 0;
bool int8_x4_format = true; bool int8_x4_format = true;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
return pack_join(migraphx::reflect(self.op, f), return pack_join(migraphx::reflect(self.op, f),
pack(f(self.int8_x4_format, "int8_x4_format"))); pack(f(self.alpha, "alpha"),
f(self.beta, "beta"),
f(self.int8_x4_format, "int8_x4_format")));
} }
std::string name() const std::string name() const
...@@ -44,6 +51,26 @@ struct rocblas_gemm ...@@ -44,6 +51,26 @@ struct rocblas_gemm
check_shapes{in_shapes, *this}.not_broadcasted(); check_shapes{in_shapes, *this}.not_broadcasted();
batch_not_transposed(inputs[0].strides()); batch_not_transposed(inputs[0].strides());
batch_not_transposed(inputs[1].strides()); batch_not_transposed(inputs[1].strides());
// if gemm and add are fused
if(not float_equal(beta, 0))
{
auto cmat_shape = in_shapes.back();
in_shapes.pop_back();
auto op_out_shape = op.compute_shape(in_shapes);
if(cmat_shape.lens() != op_out_shape.lens())
{
MIGRAPHX_THROW(this->name() + " : dimension mismatch, operand C: {" +
to_string_range(cmat_shape.lens()) +
"}, cannot add to operand A * B: {" +
to_string_range(op_out_shape.lens()) + "}");
}
if(cmat_shape.type() != op_out_shape.type())
{
MIGRAPHX_THROW(this->name() + " : operand C type mismatch, operand C is of type: " +
to_string(cmat_shape.type()) +
", it must be: " + to_string(op_out_shape.type()));
}
}
return op.compute_shape(in_shapes); return op.compute_shape(in_shapes);
} }
...@@ -51,7 +78,14 @@ struct rocblas_gemm ...@@ -51,7 +78,14 @@ struct rocblas_gemm
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
gemm(ctx, output_shape, args, op.alpha, op.beta, int8_x4_format); if(this->name() == "gpu::gemm")
{
gemm(ctx, output_shape, args, alpha, beta, int8_x4_format);
}
else
{
gemm(ctx, output_shape, args, int32_t(alpha), int32_t(beta), int8_x4_format);
}
return args.back(); return args.back();
} }
......
#ifndef MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP
#define MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_nonzero
{
op::nonzero op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::nonzero"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -2,40 +2,51 @@ ...@@ -2,40 +2,51 @@
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_ARRAY_HPP #define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_ARRAY_HPP
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <migraphx/kernels/integral_constant.hpp> #include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/debug.hpp> #include <migraphx/kernels/debug.hpp>
namespace migraphx { namespace migraphx {
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \ #define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \
constexpr array& operator op(const array& x) \ template <class U> \
{ \ constexpr array& operator op(const array<U, N>& x) \
for(index_int i = 0; i < N; i++) \ { \
d[i] op x[i]; \ for(index_int i = 0; i < N; i++) \
return *this; \ d[i] op x[i]; \
} \ return *this; \
constexpr array& operator op(const T& x) \ } \
{ \ template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
for(index_int i = 0; i < N; i++) \ constexpr array& operator op(const U& x) \
d[i] op x; \ { \
return *this; \ for(index_int i = 0; i < N; i++) \
} \ d[i] op x; \
friend constexpr array operator binary_op(const array& x, const array& y) \ return *this; \
{ \ } \
auto z = x; \ template <class U> \
return z op y; \ friend constexpr auto operator binary_op(const array& x, const array<U, N>& y) \
} \ { \
friend constexpr array operator binary_op(const array& x, const T& y) \ array<decltype(T {} binary_op U{}), N> z{}; \
{ \ for(index_int i = 0; i < N; i++) \
auto z = x; \ z[i] = x[i] binary_op y[i]; \
return z op y; \ return z; \
} \ } \
friend constexpr array operator binary_op(const T& x, const array& y) \ template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
{ \ friend constexpr auto operator binary_op(const array& x, const U& y) \
for(index_int i = 0; i < N; i++) \ { \
y[i] = x op y[i]; \ array<decltype(T {} binary_op U{}), N> z{}; \
return y; \ for(index_int i = 0; i < N; i++) \
z[i] = x[i] binary_op y; \
return z; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
friend constexpr auto operator binary_op(const U& x, const array& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
for(index_int i = 0; i < N; i++) \
z[i] = x binary_op y[i]; \
return z; \
} }
template <class T, index_int N> template <class T, index_int N>
......
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_BASIC_OPS_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_BASIC_OPS_HPP
#include <migraphx/kernels/types.hpp>
namespace migraphx {
struct sum
{
template <class T, class U>
constexpr auto operator()(T x, U y) const
{
return x + y;
}
};
struct product
{
template <class T, class U>
constexpr auto operator()(T x, U y) const
{
return x * y;
}
};
struct id
{
template <class T>
constexpr auto operator()(T x) const
{
return x;
}
};
struct mean
{
size_t item_num = 1;
template <class T>
constexpr auto operator()(T x) const
{
return x / static_cast<T>(item_num);
}
};
struct max_f
{
template <class T, class U>
constexpr auto operator()(T x, U y) const
{
return (x > y) ? x : y;
}
};
inline constexpr auto max = max_f{};
struct min_f
{
template <class T, class U>
constexpr auto operator()(T x, U y) const
{
return (x < y) ? x : y;
}
};
inline constexpr auto min = min_f{};
struct lowest
{
template <class T>
constexpr operator T() const
{
return std::numeric_limits<T>::lowest();
}
};
struct highest
{
template <class T>
constexpr operator T() const
{
return std::numeric_limits<T>::max();
}
};
} // namespace migraphx
#endif // MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_BASIC_OPS_HPP
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_DFOR_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_DFOR_HPP
namespace migraphx {
// Multidimensional for loop
inline constexpr auto dfor()
{
return [](auto f) { f(); };
}
template <class T, class... Ts>
constexpr auto dfor(T x, Ts... xs)
{
return [=](auto f) {
for(T i = 0; i < x; i++)
{
dfor(xs...)([&](Ts... is) { f(i, is...); });
}
};
}
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP
#define MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp>
namespace migraphx {
struct max_pool
{
MIGRAPHX_DEVICE_CONSTEXPR auto init() { return lowest(); }
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x, T y)
{
return max(x, y);
}
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T final(T x, std::size_t)
{
return (x);
}
};
struct avg_pool
{
MIGRAPHX_DEVICE_CONSTEXPR auto init() { return 0.0; }
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x, T y)
{
return x + y;
}
template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T final(T x, std::size_t y)
{
return (y == 0) ? 0.0 : (x / y);
}
};
template <class T, class Op>
MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data,
const array<std::size_t, 2>& dims,
array<float, 2> xy,
Op pooling)
{
array<int, 2> low{};
array<int, 2> high{};
for(std::size_t ii = 0; ii < xy.size(); ++ii)
{
if(xy[ii] < -1.0f or xy[ii] > dims[ii])
{
return 0;
}
xy[ii] = max(xy[ii], 0.0f);
low[ii] = xy[ii];
high[ii] = low[ii] + 1;
if(low[ii] >= dims[ii] - 1)
{
xy[ii] = high[ii] = low[ii] = dims[ii] - 1;
}
}
array<std::size_t, 4> locs = {low[0] * dims[1] + low[1],
low[0] * dims[1] + high[1],
high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]};
float ly = xy[0] - low[0];
float lx = xy[1] - low[1];
float hy = 1.0f - ly;
float hx = 1.0f - lx;
array<T, 4> ws = {hy * hx, hy * lx, ly * hx, ly * lx};
auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]);
auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]);
return pooling(v01, v23);
}
template <class T, class Op>
MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
const array<float, 2>& roi_starts,
const array<float, 2>& bin_size,
const array<int, 2>& idx,
const array<std::size_t, 2>& bin_grid_size,
const array<std::size_t, 2>& dims,
float roi_offset,
Op op)
{
T output_val = op.init();
const int64_t count = bin_grid_size[0] * bin_grid_size[1];
dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) {
array<std::size_t, 2> id = {iy, ix};
array<float, 2> locs =
roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset;
auto val = bilinear_interpolate(data, dims, locs, op);
output_val = op(output_val, val);
});
return op.final(output_val, count);
}
template <class T, class U, class V, class W>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t)
{
const float roi_offset = ROIS_OFFSET;
const bool is_avg_pooling = IS_AVG_POOLING;
const int64_t sampling_ratio = SAMPLING_RATIO;
const float spatial_scale = SPATIAL_SCALE;
auto index = make_index();
const auto* x = x_t.data();
const auto* rois = rois_t.data();
const auto* ind = ind_t.data();
auto* out_ptr = y_t.data();
// input shape
auto x_lens = x_t.get_shape().lens;
auto channel_num = x_lens[1];
// input dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
array<std::size_t, 2> in_dims = {x_lens[2], x_lens[3]};
const auto stride = index.nglobal();
auto out_s = y_t.get_shape();
auto roi_column_num = rois_t.get_shape().lens[1];
// output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
const auto& out_lens = out_s.lens;
array<std::size_t, 2> out_dims = {out_lens[2], out_lens[3]};
for(index_int i = index.global; i < out_s.elements(); i += stride)
{
auto idx = out_s.multi(i);
int n = idx[0];
int c = idx[1];
int ph = idx[2];
int pw = idx[3];
const auto* offset_rois = rois + (n * roi_column_num);
const int batch_ind = ind[n];
array<float, 2> roi_starts = {offset_rois[1] * spatial_scale,
offset_rois[0] * spatial_scale};
array<float, 2> roi_ends = {offset_rois[3] * spatial_scale, offset_rois[2] * spatial_scale};
array<float, 2> roi_size{};
array<float, 2> bin_size{};
array<std::size_t, 2> bin_grid_size{};
for(std::size_t ii = 0; ii < roi_size.size(); ++ii)
{
roi_size[ii] = roi_ends[ii] - roi_starts[ii];
roi_size[ii] = max(roi_size[ii], 1.0f);
bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] =
(sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]);
}
const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]);
if constexpr(is_avg_pooling)
{
out_ptr[i] = calc_pooling(offset_x,
roi_starts,
bin_size,
{ph, pw},
bin_grid_size,
in_dims,
roi_offset,
avg_pool{});
}
else
{
out_ptr[i] = calc_pooling(offset_x,
roi_starts,
bin_size,
{ph, pw},
bin_grid_size,
in_dims,
roi_offset,
max_pool{});
}
}
}
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPE_TRAITS_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPE_TRAITS_HPP
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp>
namespace migraphx {
template <bool B, class T = void>
struct enable_if
{
};
template <class T>
struct enable_if<true, T>
{
using type = T;
};
template <bool B, class T = void>
using enable_if_t = typename enable_if<B, T>::type;
template <class From, class To>
struct is_convertible : bool_constant<__is_convertible(From, To)>
{
};
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
} // namespace migraphx
#endif
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <migraphx/gpu/abs.hpp> #include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp> #include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/compile_roialign.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp> #include <migraphx/gpu/deconvolution.hpp>
...@@ -165,6 +166,7 @@ struct miopen_apply ...@@ -165,6 +166,7 @@ struct miopen_apply
add_extend_op("logsoftmax"); add_extend_op("logsoftmax");
add_extend_op("lrn"); add_extend_op("lrn");
add_extend_op("multinomial"); add_extend_op("multinomial");
add_extend_op("nonzero");
add_extend_op("pad"); add_extend_op("pad");
add_extend_op("pooling"); add_extend_op("pooling");
add_extend_op("prefix_scan_sum"); add_extend_op("prefix_scan_sum");
...@@ -181,15 +183,17 @@ struct miopen_apply ...@@ -181,15 +183,17 @@ struct miopen_apply
add_extend_op("softmax"); add_extend_op("softmax");
add_extend_op("topk"); add_extend_op("topk");
add_gemm_op<op::dot>("dot"); add_batch_norm_inference_op();
add_gemm_op<op::quant_dot>("quant_dot");
add_convolution_op(); add_convolution_op();
add_deconvolution_op(); add_deconvolution_op();
add_quant_convolution_op(); add_gemm_op<op::dot>("dot");
add_batch_norm_inference_op(); add_gemm_op<op::quant_dot>("quant_dot");
add_neg_op();
add_if_op(); add_if_op();
add_loop_op(); add_loop_op();
add_neg_op();
add_nms_op();
add_quant_convolution_op();
add_roialign();
} }
void copy_params() void copy_params()
...@@ -304,17 +308,14 @@ struct miopen_apply ...@@ -304,17 +308,14 @@ struct miopen_apply
}); });
} }
template <class Op> template <typename Op>
void add_gemm_op(std::string name) void add_gemm_op(const std::string& name)
{ {
apply_map.emplace(name, [=](instruction_ref ins) { apply_map.emplace(name, [=](instruction_ref ins) {
auto&& op = any_cast<Op>(ins->get_operator());
auto beta = op.beta;
std::vector<instruction_ref> refs = ins->inputs(); std::vector<instruction_ref> refs = ins->inputs();
if(refs.size() == 2) if(refs.size() == 2)
{ {
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
beta = 0;
refs.push_back(output); refs.push_back(output);
} }
else else
...@@ -333,9 +334,8 @@ struct miopen_apply ...@@ -333,9 +334,8 @@ struct miopen_apply
refs.push_back(refs.back()); refs.push_back(refs.back());
} }
} }
return mod->replace_instruction( return mod->replace_instruction(
ins, rocblas_gemm<Op>{Op{op.alpha, beta}, int8_x4_format}, refs); ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format}, refs);
}); });
} }
...@@ -472,6 +472,22 @@ struct miopen_apply ...@@ -472,6 +472,22 @@ struct miopen_apply
}); });
} }
void add_roialign()
{
apply_map.emplace("roialign", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto op_val = ins->get_operator().to_value();
auto output = insert_allocation(ins, s);
auto args = ins->inputs();
args.push_back(output);
auto io_shapes = to_shapes(args);
auto co = compile_roialign(get_context(), io_shapes, op_val);
return mod->replace_instruction(ins, co, args);
});
}
// replace the loop operator with gpu_loop operator // replace the loop operator with gpu_loop operator
void add_loop_op() void add_loop_op()
{ {
...@@ -509,6 +525,26 @@ struct miopen_apply ...@@ -509,6 +525,26 @@ struct miopen_apply
ins, make_op("gpu::loop", ins->get_operator().to_value()), inputs, mod_args); ins, make_op("gpu::loop", ins->get_operator().to_value()), inputs, mod_args);
}); });
} }
void add_nms_op()
{
apply_map.emplace("nonmaxsuppression", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto output = insert_allocation(ins, s);
std::vector<instruction_ref> cpu_inputs;
auto inputs = ins->inputs();
std::transform(
inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) {
return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in);
});
cpu_inputs.front() =
mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs);
auto cpu_out = mod->insert_instruction(ins, ins->get_operator(), cpu_inputs);
auto gpu_out =
mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output);
return mod->replace_instruction(ins, gpu_out);
});
}
}; };
void lowering::apply(module& m) const { miopen_apply{&m, this}.apply(); } void lowering::apply(module& m) const { miopen_apply{&m, this}.apply(); }
......
#include <migraphx/gpu/nonzero.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/nonzero.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_nonzero::compute_shape(std::vector<shape> inputs) const
{
return op.compute_shape({inputs.front()});
}
argument hip_nonzero::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
return device::nonzero(ctx.get_stream().get(), args.back(), args.front());
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#include <migraphx/auto_contiguous.hpp> #include <migraphx/auto_contiguous.hpp>
#include <migraphx/check_context.hpp> #include <migraphx/check_context.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/decompose.hpp>
#include <migraphx/eliminate_allocation.hpp> #include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp> #include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp> #include <migraphx/eliminate_concat.hpp>
...@@ -17,7 +16,6 @@ ...@@ -17,7 +16,6 @@
#include <migraphx/preallocate_param.hpp> #include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/remap.hpp>
#include <migraphx/rewrite_batchnorm.hpp> #include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp> #include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
...@@ -59,7 +57,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -59,7 +57,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
return return
{ {
normalize_ops{}, normalize_ops{},
decompose{},
dead_code_elimination{}, dead_code_elimination{},
simplify_qdq{}, simplify_qdq{},
rewrite_quantization{}, rewrite_quantization{},
......
...@@ -518,42 +518,12 @@ struct ref_gemm ...@@ -518,42 +518,12 @@ struct ref_gemm
return migraphx::reflect(self.op, f); return migraphx::reflect(self.op, f);
} }
std::string name() const { return "ref::dot"; } std::string name() const { return "ref::dot"; }
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
{
if(inputs.size() == 3)
{
auto c_shape = inputs.at(2);
check_shapes{{c_shape}, *this}.not_broadcasted();
}
return op.compute_shape(inputs);
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
// 3 inputs, it is alpha * A * B + beta * C, then migemm(result, args[0], args[1], 1.0f, 0.0f);
// A and B are matrices, and C is of the same shape as A * B
if(args.size() == 3)
{
// no need to consider the value of args[2]
if(op.beta == 0.0f)
{
result.visit([&](auto output) { std::fill(output.begin(), output.end(), 0); });
}
else
{
visit_all(result, args[2])([&](auto output, auto input) {
std::copy(input.begin(), input.end(), output.begin());
});
}
migemm(result, args[0], args[1], op.alpha, op.beta);
return result;
}
// 2 input arguments
migemm(result, args[0], args[1], op.alpha, 0.0f);
return result; return result;
} }
...@@ -571,22 +541,11 @@ struct ref_quant_gemm ...@@ -571,22 +541,11 @@ struct ref_quant_gemm
} }
std::string name() const { return "ref::quant_dot"; } std::string name() const { return "ref::quant_dot"; }
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
{
if(inputs.size() == 3)
{
auto c_shape = inputs.at(2);
check_shapes{{c_shape}, *this}.not_broadcasted();
}
return op.compute_shape(inputs);
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t // first, convert the args[0] and args[1] from int8_t to int32_t
argument arg_0{{shape::int32_type, {args.at(0).get_shape().lens()}}}; argument arg_0{{shape::int32_type, {args.at(0).get_shape().lens()}}};
argument arg_1{{shape::int32_type, {args.at(1).get_shape().lens()}}}; argument arg_1{{shape::int32_type, {args.at(1).get_shape().lens()}}};
...@@ -600,27 +559,7 @@ struct ref_quant_gemm ...@@ -600,27 +559,7 @@ struct ref_quant_gemm
[&](auto input) { std::copy(input.begin(), input.end(), output.begin()); }); [&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
}); });
if(args.size() == 3) migemm(result, arg_0, arg_1, int32_t{1}, int32_t{0});
{
// no need to consider the value of args[2]
if(op.beta == 0)
{
result.visit([&](auto output) { std::fill(output.begin(), output.end(), 0); });
}
else
{
visit_all(result, args[2])([&](auto output, auto input) {
std::copy(input.begin(), input.end(), output.begin());
});
}
migemm(result, arg_0, arg_1, op.alpha, op.beta);
return result;
}
// 2 input arguments
migemm(result, arg_0, arg_1, op.alpha, int32_t{0});
return result; return result;
} }
......
...@@ -45,7 +45,7 @@ TEST_CASE(if_pl_test) ...@@ -45,7 +45,7 @@ TEST_CASE(if_pl_test)
auto ys = param_shapes["y"]; auto ys = param_shapes["y"];
std::vector<float> yd(ys.bytes() / sizeof(float), 2.0); std::vector<float> yd(ys.bytes() / sizeof(float), 2.0);
pp.add("y", migraphx::argument(ys, yd.data())); pp.add("y", migraphx::argument(ys, yd.data()));
char ccond = static_cast<char>(cond); char ccond = cond;
pp.add("cond", migraphx::argument(param_shapes["cond"], &ccond)); pp.add("cond", migraphx::argument(param_shapes["cond"], &ccond));
auto outputs = p.eval(pp); auto outputs = p.eval(pp);
......
...@@ -8,16 +8,22 @@ TEST_CASE(add_op) ...@@ -8,16 +8,22 @@ TEST_CASE(add_op)
EXPECT(add_op.name() == "add"); EXPECT(add_op.name() == "add");
} }
TEST_CASE(reduce_mean) TEST_CASE(reduce_mean_without_quotes)
{ {
auto rm = migraphx::operation("reduce_mean", "{axes : [1, 2, 3, 4]}"); auto rm = migraphx::operation("reduce_mean", "{axes : [1, 2, 3, 4]}");
EXPECT(rm.name() == "reduce_mean"); EXPECT(rm.name() == "reduce_mean");
} }
TEST_CASE(reduce_mean1) TEST_CASE(reduce_mean)
{ {
auto rm = migraphx::operation("reduce_mean", "{\"axes\" : [1, 2, 3, 4]}"); auto rm = migraphx::operation("reduce_mean", "{\"axes\" : [1, 2, 3, 4]}");
EXPECT(rm.name() == "reduce_mean"); EXPECT(rm.name() == "reduce_mean");
} }
TEST_CASE(reduce_mean_with_format)
{
auto rm = migraphx::operation("reduce_mean", "{axes : [%i, %i, %i, %i]}", 1, 2, 3, 4);
EXPECT(rm.name() == "reduce_mean");
}
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment