Unverified Commit 0879b5f1 authored by Shucai Xiao's avatar Shucai Xiao Committed by GitHub
Browse files

Nonzero op extension (#870)



This PR is for the nonzero operator with static output shape.  
Co-authored-by: default avatarPaul Fultz II <pfultz2@yahoo.com>
Co-authored-by: default avatarmvermeulen <5479696+mvermeulen@users.noreply.github.com>
parent 21193e87
......@@ -130,6 +130,7 @@ register_migraphx_ops(
multibroadcast
multinomial
neg
nonzero
outline
pad
pooling
......
#ifndef MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/par_for.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonzero
{
std::string name() const { return "nonzero"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
auto elem_num = inputs[0].elements();
auto dim_num = inputs[0].lens().size();
std::vector<std::size_t> out_lens = {dim_num, elem_num};
return {shape::int64_type, out_lens};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
std::vector<std::vector<std::size_t>> vec_idx;
auto s = args.front().get_shape();
args.front().visit([&](auto v) {
shape_for_each(s, [&](auto idx) {
if(not float_equal(v[s.index(idx)], 0))
{
vec_idx.push_back(idx);
}
});
});
argument result{output_shape};
result.visit([&](auto output) {
std::fill(output.begin(), output.end(), 0);
par_for(vec_idx.size(), [&](auto i) {
for(std::size_t j = 0; j < vec_idx.front().size(); ++j)
{
output[output_shape.index({j, i})] = vec_idx[i][j];
}
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -57,6 +57,7 @@
#include <migraphx/op/mul.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/neg.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
......
......@@ -9,7 +9,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
template <class T>
std::vector<std::size_t> nonzero_indices(const std::vector<T>& data)
static std::vector<std::size_t> nonzero_indices(const std::vector<T>& data)
{
std::vector<std::size_t> indices;
for(std::size_t i = 0; i < data.size(); ++i)
......@@ -31,8 +31,12 @@ struct parse_nonzero : op_parser<parse_nonzero>
std::vector<instruction_ref> args) const
{
migraphx::argument data_arg = args.back()->eval();
check_arg_empty(data_arg, "PARSE_NONZERO: cannot support non-constant input!");
if(data_arg.empty())
{
return info.add_instruction(make_op("nonzero"), args);
}
else
{
std::vector<std::size_t> indices;
data_arg.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
......@@ -56,6 +60,7 @@ struct parse_nonzero : op_parser<parse_nonzero>
return info.add_literal(literal(out_s, out_data));
}
}
};
} // namespace onnx
......
......@@ -60,6 +60,7 @@ add_library(migraphx_device
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
......@@ -145,6 +146,7 @@ add_library(migraphx_gpu
leaky_relu.cpp
mlir_conv.cpp
multinomial.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
pad.cpp
......@@ -202,6 +204,7 @@ register_migraphx_gpu_ops(hip_
min
mul
multinomial
nonzero
pad
pow
prelu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class... Ts>
using common_type = typename std::common_type<Ts...>::type;
template <class T, MIGRAPHX_REQUIRES(is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return std::isfinite(x) and std::isfinite(y) and
std::nextafter(x, std::numeric_limits<T>::lowest()) <= y and
std::nextafter(x, std::numeric_limits<T>::max()) >= y;
}
template <class T, MIGRAPHX_REQUIRES(not is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return x == y;
}
template <class T, class U>
__device__ bool float_equal(T x, U y)
{
return float_equal_device<common_type<T, U>>(x, y);
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -129,6 +129,21 @@ __device__ __host__ T to_hip_type(T x)
// Hip doens't support __fp16
inline __device__ __host__ float to_hip_type(gpu_half x) { return x; }
#define MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(trait, T) \
template <class X> \
struct trait : std::trait<X> \
{ \
}; \
\
template <> \
struct trait<T> : std::true_type \
{ \
};
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, __fp16)
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
#include <migraphx/gpu/device/nonzero.hpp>
#include <migraphx/gpu/device/float_equal.hpp>
#include <migraphx/gpu/device/scan.hpp>
#include <migraphx/gpu/device/reduce_ops.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data)
{
auto s = arg_data.get_shape();
auto elem_num = s.elements();
auto out_elem_num = result.get_shape().elements();
// call the prefix_sum function to do a prefix_sum to compute
// index in the output. Only 1 block can be used since we have
// only one prefix sum
const index_int block_size = 256;
hip_visit_all(arg_data, s)([&](auto input, auto si) {
const auto* in_ptr = device_cast(input.data());
auto* ptr = result.cast<int64_t>();
gs_launch(stream, block_size, block_size)([=](auto, auto idx) __device__ {
// fill all output to 0 first
idx.local_stride(out_elem_num, [&](auto j) { ptr[j] = 0; });
block_scan<block_size>(idx,
sum{},
0,
elem_num,
[&](auto j) { return (float_equal(in_ptr[j], 0)) ? 0 : 1; },
[&](auto j, auto x) {
auto out_loc = x - 1;
if(float_equal(in_ptr[j], 0))
return;
auto index = si.multi(j);
for(size_t k = 0; k < index.size(); ++k)
{
ptr[k * elem_num + out_loc] = index[k];
}
});
});
});
return result;
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP
#define MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_nonzero
{
op::nonzero op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::nonzero"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -165,6 +165,7 @@ struct miopen_apply
add_extend_op("logsoftmax");
add_extend_op("lrn");
add_extend_op("multinomial");
add_extend_op("nonzero");
add_extend_op("pad");
add_extend_op("pooling");
add_extend_op("prefix_scan_sum");
......@@ -181,15 +182,15 @@ struct miopen_apply
add_extend_op("softmax");
add_extend_op("topk");
add_gemm_op<op::dot>("dot");
add_gemm_op<op::quant_dot>("quant_dot");
add_batch_norm_inference_op();
add_convolution_op();
add_deconvolution_op();
add_quant_convolution_op();
add_batch_norm_inference_op();
add_neg_op();
add_gemm_op<op::dot>("dot");
add_gemm_op<op::quant_dot>("quant_dot");
add_if_op();
add_loop_op();
add_neg_op();
add_quant_convolution_op();
}
void copy_params()
......
#include <migraphx/gpu/nonzero.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/nonzero.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_nonzero::compute_shape(std::vector<shape> inputs) const
{
return op.compute_shape({inputs.front()});
}
argument hip_nonzero::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
return device::nonzero(ctx.get_stream().get(), args.back(), args.front());
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -2748,6 +2748,18 @@ def no_pad_test():
return ([node], [x], [y])
@onnx_test
def nonzero_dynamic_test():
x = helper.make_tensor_value_info('data', TensorProto.BOOL, [2, 2])
y = helper.make_tensor_value_info('indices', TensorProto.INT64, [2, 3])
node = onnx.helper.make_node('NonZero',
inputs=['data'],
outputs=['indices'])
return ([node], [x], [y])
@onnx_test
def nonzero_test():
data1 = np.array([[1., 0.], [1., 1.]])
......
nonzero_dynamic_test:c

dataindices"NonZerononzero_dynamic_testZ
data
 

b
indices


B
\ No newline at end of file
......@@ -2404,6 +2404,19 @@ TEST_CASE(neg_test)
EXPECT(p == prog);
}
TEST_CASE(nonzero_dynamic_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::bool_type, {2, 2}};
auto data = mm->add_parameter("data", s);
auto r = mm->add_instruction(migraphx::make_op("nonzero"), data);
mm->add_return({r});
auto prog = migraphx::parse_onnx("nonzero_dynamic_test.onnx");
EXPECT(p == prog);
}
TEST_CASE(nonzero_test)
{
migraphx::program p;
......
......@@ -308,6 +308,25 @@ TEST_CASE(lessorequal_test)
EXPECT(migraphx::verify_range(result_vector, gold));
}
TEST_CASE(nonzero_test)
{
migraphx::program p = migraphx::parse_onnx("nonzero_dynamic_test.onnx");
p.compile(migraphx::ref::target{});
migraphx::shape s{migraphx::shape::bool_type, {2, 2}};
std::vector<char> data = {1, 1, 1, 0};
migraphx::parameter_map pp;
pp["data"] = migraphx::argument(s, data.data());
auto result = p.eval(pp).back();
std::vector<float> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<float> gold = {0, 0, 1, 0, 0, 1, 0, 0};
EXPECT(migraphx::verify_range(result_vector, gold));
}
TEST_CASE(resize_downsample_f_test)
{
migraphx::program p = migraphx::parse_onnx("resize_downsample_f_test.onnx");
......
......@@ -53,6 +53,22 @@ def test_neg_int64():
print(r)
def test_nonzero():
p = migraphx.parse_onnx("nonzero_dynamic_test.onnx")
print(p)
print("Compiling ...")
p.compile(migraphx.get_target("gpu"))
print(p)
params = {}
shapes = p.get_parameter_shapes()
params["data"] = np.array([1, 1, 0, 1]).reshape(
shapes["data"].lens()).astype(np.bool)
r = p.run(params)
print(r)
def test_fp16_imagescaler():
p = migraphx.parse_onnx("imagescaler_half_test.onnx")
print(p)
......@@ -98,3 +114,4 @@ test_sub_uint64()
test_neg_int64()
test_fp16_imagescaler()
test_if_pl()
test_nonzero()
......@@ -2756,6 +2756,26 @@ TEST_CASE(neg_test)
EXPECT(migraphx::verify_range(result_vector, gold));
}
TEST_CASE(nonzero_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::float_type, {2, 2, 3}};
std::vector<float> data = {
1.0f, 1.3f, 0.0f, -1.2f, 0.0f, -100.f, 200.f, 0.0f, 0.1f, 0.2f, 0.0f, 0.5f};
auto input = mm->add_literal(migraphx::literal(s, data));
auto ret = mm->add_instruction(migraphx::make_op("nonzero"), input);
mm->add_return({ret});
p.compile(migraphx::ref::target{});
auto result = p.eval({}).back();
std::cout << "result = " << result << std::endl;
std::vector<int64_t> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<int64_t> gold = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 2, 0, 2, 0, 0, 0, 0};
EXPECT(migraphx::verify_range(result_vector, gold));
}
TEST_CASE(not_test)
{
// int32
......
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_nonzero : verify_program<test_nonzero>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 5}};
auto x = mm->add_parameter("data", s);
auto r = mm->add_instruction(migraphx::make_op("nonzero"), x);
mm->add_return({r});
return p;
}
};
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_nonzero_half : verify_program<test_nonzero_half>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::half_type, {3, 4, 3, 5}};
auto x = mm->add_parameter("data", s);
auto r = mm->add_instruction(migraphx::make_op("nonzero"), x);
mm->add_return({r});
return p;
}
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment