Commit 86dca4c9 authored by Khalique's avatar Khalique
Browse files

manual merge

parents 9b46b9fd 73b16700
......@@ -32,7 +32,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif()
endif()
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("--cuda-host-only -x hip" HAS_HIP)
if(HAS_HIP)
message(STATUS "Enable miopen backend")
set(MIGRAPHX_ENABLE_GPU On CACHE BOOL "")
else()
......@@ -81,6 +83,7 @@ rocm_enable_clang_tidy(
-modernize-use-override
-modernize-pass-by-value
-modernize-use-default-member-init
-modernize-use-trailing-return-type
-modernize-use-transparent-functors
-performance-type-promotion-in-math-fn
-readability-braces-around-statements
......
......@@ -20,6 +20,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
clang-format-5.0 \
clang-tidy-5.0 \
cmake \
comgr \
curl \
doxygen \
g++-7 \
......@@ -32,14 +33,16 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
libncurses5-dev \
libnuma-dev \
libpthread-stubs0-dev \
libssl-dev \
python \
python-dev \
python-pip \
rocm-device-libs \
rocm-opencl \
rocm-opencl-dev \
rocminfo \
software-properties-common \
wget && \
wget \
zlib1g-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
......@@ -50,7 +53,7 @@ RUN pip install cget
RUN pip install https://github.com/pfultz2/rclone/archive/master.tar.gz
# Install hcc
RUN rclone -b roc-2.3.x -c fd93baed7dcc4fe8019b5fdc90213bfe7c298245 https://github.com/RadeonOpenCompute/hcc.git /hcc
RUN rclone -b roc-2.6.x -c 0f4c96b7851af2663a7f3ac16ecfb76c7c78a5bf https://github.com/RadeonOpenCompute/hcc.git /hcc
RUN cget -p $PREFIX install hcc,/hcc
# Use hcc
......
#ifndef MIGRAPHX_GUARD_OPERATORS_RSQRT_HPP
#define MIGRAPHX_GUARD_OPERATORS_RSQRT_HPP
#include <migraphx/op/unary.hpp>
#include <cmath>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct rsqrt : unary<rsqrt>
{
auto apply() const
{
return [](auto x) { return 1 / std::sqrt(x); };
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -53,6 +53,7 @@
#include <migraphx/op/rnn.hpp>
#include <migraphx/op/rnn_last_cell_output.hpp>
#include <migraphx/op/rnn_last_output.hpp>
#include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/scalar.hpp>
#include <migraphx/op/sigmoid.hpp>
#include <migraphx/op/sinh.hpp>
......
......@@ -66,13 +66,15 @@ struct onnx_parser
add_variadic_op("Max", op::max{});
add_variadic_op("Min", op::min{});
add_mem_op("ArgMax", &onnx_parser::parse_argmax);
add_mem_op("ArgMin", &onnx_parser::parse_argmin);
add_mem_op("ArgMax", &onnx_parser::parse_arg_op<op::argmax>);
add_mem_op("ArgMin", &onnx_parser::parse_arg_op<op::argmin>);
add_mem_op("Cast", &onnx_parser::parse_cast);
add_mem_op("Clip", &onnx_parser::parse_clip);
add_mem_op("LRN", &onnx_parser::parse_lrn);
add_mem_op("ImageScaler", &onnx_parser::parse_imagescaler);
add_mem_op("LeakyRelu", &onnx_parser::parse_leaky_relu);
add_mem_op("Elu", &onnx_parser::parse_elu);
add_mem_op("Expand", &onnx_parser::parse_expand);
add_mem_op("Constant", &onnx_parser::parse_constant);
add_mem_op("Conv", &onnx_parser::parse_conv);
add_mem_op("MaxPool", &onnx_parser::parse_pooling);
......@@ -84,8 +86,8 @@ struct onnx_parser
add_mem_op("Gemm", &onnx_parser::parse_gemm);
add_mem_op("MatMul", &onnx_parser::parse_matmul);
add_mem_op("BatchNormalization", &onnx_parser::parse_batchnorm);
add_mem_op("Softmax", &onnx_parser::parse_softmax);
add_mem_op("LogSoftmax", &onnx_parser::parse_logsoftmax);
add_mem_op("Softmax", &onnx_parser::parse_softmax<op::softmax>);
add_mem_op("LogSoftmax", &onnx_parser::parse_softmax<op::logsoftmax>);
add_mem_op("Squeeze", &onnx_parser::parse_squeeze);
add_mem_op("Unsqueeze", &onnx_parser::parse_unsqueeze);
add_mem_op("Slice", &onnx_parser::parse_slice);
......@@ -93,6 +95,7 @@ struct onnx_parser
add_mem_op("Gather", &onnx_parser::parse_gather);
add_mem_op("Shape", &onnx_parser::parse_shape);
add_mem_op("ConstantFill", &onnx_parser::parse_constant_fill);
add_mem_op("ConstantOfShape", &onnx_parser::parse_constant_of_shape);
add_mem_op("Transpose", &onnx_parser::parse_transpose);
add_mem_op("RNN", &onnx_parser::parse_rnn);
add_mem_op("GRU", &onnx_parser::parse_gru);
......@@ -258,19 +261,10 @@ struct onnx_parser
return prog.add_instruction(op, std::move(args));
}
instruction_ref
parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
{
auto dims = args.front()->get_shape().lens();
auto r =
prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, args.front());
auto s = prog.add_instruction(op::softmax{}, r);
return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
}
instruction_ref parse_logsoftmax(const std::string&,
const attribute_map& attributes,
std::vector<instruction_ref> args)
template <class Op>
instruction_ref parse_softmax(const std::string&,
const attribute_map& attributes,
std::vector<instruction_ref> args)
{
int axis = 1;
if(contains(attributes, "axis"))
......@@ -278,37 +272,11 @@ struct onnx_parser
axis = parse_value(attributes.at("axis")).at<int>();
}
return prog.add_instruction(op::logsoftmax{axis}, std::move(args));
}
instruction_ref parse_argmax(const std::string&,
const attribute_map& attributes,
std::vector<instruction_ref> args)
{
int64_t axis = 0;
if(contains(attributes, "axis"))
{
axis = static_cast<int64_t>(parse_value(attributes.at("axis")).at<int>());
}
int keep_dims = 1;
if(contains(attributes, "keepdims"))
{
keep_dims = parse_value(attributes.at("keepdims")).at<int>();
}
if(keep_dims == 0)
{
auto ins = prog.add_instruction(op::argmax{axis}, std::move(args));
return prog.add_instruction(op::squeeze{{axis}}, ins);
}
else
{
return prog.add_instruction(op::argmax{axis}, std::move(args));
}
return prog.add_instruction(Op{axis}, std::move(args));
}
instruction_ref parse_argmin(const std::string&,
template <class Op>
instruction_ref parse_arg_op(const std::string&,
const attribute_map& attributes,
std::vector<instruction_ref> args)
{
......@@ -326,12 +294,12 @@ struct onnx_parser
if(keep_dims == 0)
{
auto ins = prog.add_instruction(op::argmin{axis}, std::move(args));
auto ins = prog.add_instruction(Op{axis}, std::move(args));
return prog.add_instruction(op::squeeze{{axis}}, ins);
}
else
{
return prog.add_instruction(op::argmin{axis}, std::move(args));
return prog.add_instruction(Op{axis}, std::move(args));
}
}
......@@ -464,10 +432,15 @@ struct onnx_parser
if(args.size() == 2)
{
auto s = args[1]->eval();
if(s.empty())
MIGRAPHX_THROW("Dynamic shape is not supported.");
check_arg_empty(s, "Reshape: dynamic shape is not supported");
s.visit([&](auto v) { copy(v, std::back_inserter(op.dims)); });
}
if(!args[0]->get_shape().standard())
{
args[0] = prog.add_instruction(op::contiguous{}, args[0]);
}
return prog.add_instruction(op, args[0]);
}
......@@ -544,7 +517,13 @@ struct onnx_parser
attribute_map attributes,
const std::vector<instruction_ref>&)
{
literal v = parse_value(attributes.at("value"));
literal v = parse_value(attributes.at("value"));
// return empty literal
if(v.get_shape().elements() == 0)
{
return prog.add_literal(literal{});
}
auto dim_size = attributes.at("value").t().dims_size();
// if dim_size is 0, it is a scalar
if(dim_size == 0)
......@@ -841,7 +820,7 @@ struct onnx_parser
{
dtype = parse_value(attributes.at("dtype")).at<int>();
}
migraphx::shape::type_t type = get_type(dtype);
shape::type_t type = get_type(dtype);
if(contains(attributes, "input_as_shape"))
{
......@@ -872,10 +851,7 @@ struct onnx_parser
}
migraphx::argument in = args[0]->eval();
if(in.empty())
{
MIGRAPHX_THROW("ConstantFill: cannot handle dynamic shape as input");
}
check_arg_empty(in, "ConstantFill: dynamic shape is not supported");
std::vector<std::size_t> dims;
in.visit([&](auto input) { dims.assign(input.begin(), input.end()); });
......@@ -903,6 +879,73 @@ struct onnx_parser
}
}
instruction_ref parse_constant_of_shape(const std::string&,
attribute_map attributes,
std::vector<instruction_ref> args)
{
literal l_val{};
if(contains(attributes, "value"))
{
l_val = parse_value(attributes.at("value"));
if(l_val.get_shape().elements() != 1)
{
MIGRAPHX_THROW("ConstantOfShape: attribute value can contain only 1 elements!");
}
}
else
{
l_val = literal({shape::float_type, {1}, {0}}, {0.0f});
}
// input is empty, output is a scalar
auto type = l_val.get_shape().type();
if(args.empty())
{
MIGRAPHX_THROW("ConstantOfShape : must have 1 input!");
}
else
{
migraphx::shape s;
// empty input tensor, output is a scalar
if(args[0]->get_shape().elements() == 0)
{
s = migraphx::shape{type, {1}, {0}};
}
else
{
migraphx::argument in = args[0]->eval();
check_arg_empty(in, "ConstantOfShape: dynamic shape is not supported");
std::vector<std::size_t> dims;
in.visit([&](auto input) { dims.assign(input.begin(), input.end()); });
s = migraphx::shape{type, dims};
}
literal l_out{};
l_val.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
// l_val contains only one element
std::vector<val_type> out_vec(s.elements(), *val.begin());
l_out = literal(s, out_vec);
});
return prog.add_literal(l_out);
}
}
instruction_ref
parse_expand(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
{
auto in_lens = args[0]->get_shape().lens();
migraphx::argument arg_s = args[1]->eval();
check_arg_empty(arg_s, "Expand: dynamic shape is not supported");
std::vector<std::size_t> dims;
arg_s.visit([&](auto input) { dims.assign(input.begin(), input.end()); });
auto out_lens = compute_broadcasted_lens(in_lens, dims);
return prog.add_instruction(op::multibroadcast{out_lens}, args[0]);
}
std::vector<instruction_ref>
parse_rnn(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
......@@ -1325,6 +1368,19 @@ struct onnx_parser
}
}
instruction_ref
parse_cast(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
if(!contains(attributes, "to"))
{
MIGRAPHX_THROW("PARSE_CAST: missing to type attribute!");
}
int to_type = parse_value(attributes.at("to")).at<int>();
shape::type_t type = get_type(to_type);
return prog.add_instruction(op::convert{type}, std::move(args));
}
void parse_from(std::istream& is)
{
onnx::ModelProto model;
......@@ -1471,16 +1527,16 @@ struct onnx_parser
{
switch(attr.type())
{
case onnx::AttributeProto::UNDEFINED: return {};
case onnx::AttributeProto::FLOAT: return literal{attr.f()};
case onnx::AttributeProto::INT: return literal{attr.i()};
case onnx::AttributeProto::STRING: return {};
case onnx::AttributeProto::TENSOR: return parse_tensor(attr.t());
case onnx::AttributeProto::GRAPH: return {};
case onnx::AttributeProto::FLOATS: return from_repeated(shape::float_type, attr.floats());
case onnx::AttributeProto::INTS: return from_repeated(shape::int64_type, attr.ints());
case onnx::AttributeProto::STRINGS: return {};
case onnx::AttributeProto::TENSORS: return {};
case onnx::AttributeProto::UNDEFINED:
case onnx::AttributeProto::GRAPH:
case onnx::AttributeProto::STRING:
case onnx::AttributeProto::STRINGS:
case onnx::AttributeProto::TENSORS:
case onnx::AttributeProto::GRAPHS: return {};
}
MIGRAPHX_THROW("Invalid attribute type");
......@@ -1494,47 +1550,41 @@ struct onnx_parser
const std::string& s = t.raw_data();
switch(t.data_type())
{
case onnx::TensorProto::UNDEFINED: throw std::runtime_error("");
case onnx::TensorProto::FLOAT: return create_literal(shape::float_type, dims, s.data());
case onnx::TensorProto::UINT8: throw std::runtime_error("");
case onnx::TensorProto::INT8: return create_literal(shape::int32_type, dims, s.data());
case onnx::TensorProto::UINT16:
return create_literal(shape::int32_type, dims, s.data());
case onnx::TensorProto::INT16: return create_literal(shape::int32_type, dims, s.data());
case onnx::TensorProto::INT32: return create_literal(shape::int32_type, dims, s.data());
case onnx::TensorProto::INT64: return create_literal(shape::int64_type, dims, s.data());
case onnx::TensorProto::STRING: throw std::runtime_error("");
case onnx::TensorProto::BOOL: return create_literal(shape::int32_type, dims, s.data());
case onnx::TensorProto::FLOAT16:
return create_literal(shape::half_type, dims, s.data());
case onnx::TensorProto::DOUBLE:
return create_literal(shape::double_type, dims, s.data());
case onnx::TensorProto::UINT32: throw std::runtime_error("");
case onnx::TensorProto::UINT64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX64: throw std::runtime_error("");
case onnx::TensorProto::INT64: return create_literal(shape::int64_type, dims, s.data());
case onnx::TensorProto::INT8:
case onnx::TensorProto::UINT16:
case onnx::TensorProto::INT16:
case onnx::TensorProto::INT32:
case onnx::TensorProto::BOOL: return create_literal(shape::int32_type, dims, s.data());
case onnx::TensorProto::UINT8:
case onnx::TensorProto::STRING:
case onnx::TensorProto::UNDEFINED:
case onnx::TensorProto::UINT32:
case onnx::TensorProto::UINT64:
case onnx::TensorProto::COMPLEX64:
case onnx::TensorProto::COMPLEX128: throw std::runtime_error("");
}
MIGRAPHX_THROW("Invalid tensor type");
}
switch(t.data_type())
{
case onnx::TensorProto::UNDEFINED: throw std::runtime_error("");
case onnx::TensorProto::FLOAT:
return create_literal(shape::float_type, dims, t.float_data());
case onnx::TensorProto::UINT8: throw std::runtime_error("");
case onnx::TensorProto::INT8:
return create_literal(shape::int32_type, dims, t.int32_data());
case onnx::TensorProto::UINT16:
return create_literal(shape::int32_type, dims, t.int32_data());
case onnx::TensorProto::INT16:
return create_literal(shape::int32_type, dims, t.int32_data());
case onnx::TensorProto::INT32:
case onnx::TensorProto::BOOL:
return create_literal(shape::int32_type, dims, t.int32_data());
case onnx::TensorProto::INT64:
return create_literal(shape::int64_type, dims, t.int64_data());
case onnx::TensorProto::STRING: throw std::runtime_error("");
case onnx::TensorProto::BOOL:
return create_literal(shape::int32_type, dims, t.int32_data());
case onnx::TensorProto::DOUBLE:
return create_literal(shape::double_type, dims, t.double_data());
case onnx::TensorProto::FLOAT:
return create_literal(shape::float_type, dims, t.float_data());
case onnx::TensorProto::FLOAT16:
{
std::vector<uint16_t> data_uint16(t.int32_data().begin(), t.int32_data().end());
......@@ -1545,11 +1595,12 @@ struct onnx_parser
[](uint16_t raw_val) { return *reinterpret_cast<half*>(&raw_val); });
return create_literal(shape::half_type, dims, data_half);
}
case onnx::TensorProto::DOUBLE:
return create_literal(shape::double_type, dims, t.double_data());
case onnx::TensorProto::UINT32: throw std::runtime_error("");
case onnx::TensorProto::UINT64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX64: throw std::runtime_error("");
case onnx::TensorProto::UNDEFINED:
case onnx::TensorProto::UINT8:
case onnx::TensorProto::STRING:
case onnx::TensorProto::UINT32:
case onnx::TensorProto::UINT64:
case onnx::TensorProto::COMPLEX64:
case onnx::TensorProto::COMPLEX128: throw std::runtime_error("");
}
MIGRAPHX_THROW("Invalid tensor type");
......@@ -1577,28 +1628,23 @@ struct onnx_parser
shape::type_t shape_type{};
switch(t.tensor_type().elem_type())
{
case onnx::TensorProto::UNDEFINED:
break; // throw std::runtime_error("Unsupported type UNDEFINED");
case onnx::TensorProto::FLOAT: shape_type = shape::float_type; break;
case onnx::TensorProto::UINT8:
break; // throw std::runtime_error("Unsupported type UINT8");
case onnx::TensorProto::INT8: shape_type = shape::int8_type; break;
case onnx::TensorProto::UINT16: shape_type = shape::uint16_type; break;
case onnx::TensorProto::INT16: shape_type = shape::int16_type; break;
case onnx::TensorProto::INT32: shape_type = shape::int32_type; break;
case onnx::TensorProto::INT64: shape_type = shape::int64_type; break;
case onnx::TensorProto::STRING:
break; // throw std::runtime_error("Unsupported type STRING");
case onnx::TensorProto::BOOL:
break; // throw std::runtime_error("Unsupported type BOOL");
case onnx::TensorProto::FLOAT16: shape_type = shape::half_type; break;
case onnx::TensorProto::DOUBLE: shape_type = shape::double_type; break;
case onnx::TensorProto::UINT32: shape_type = shape::uint32_type; break;
case onnx::TensorProto::UINT64: shape_type = shape::uint64_type; break;
case onnx::TensorProto::UINT8:
case onnx::TensorProto::STRING:
case onnx::TensorProto::BOOL:
case onnx::TensorProto::UNDEFINED:
case onnx::TensorProto::COMPLEX64:
break; // throw std::runtime_error("Unsupported type COMPLEX64");
case onnx::TensorProto::COMPLEX128:
break; // throw std::runtime_error("Unsupported type COMPLEX128");
break; // throw std::runtime_error("Unsupported type");
}
std::vector<std::size_t> dims;
auto&& tensor_dims = t.tensor_type().shape().dim();
......@@ -1637,6 +1683,14 @@ struct onnx_parser
}
}
}
void check_arg_empty(const argument& arg, const std::string& msg)
{
if(arg.empty())
{
MIGRAPHX_THROW(msg);
}
}
};
program parse_onnx(const std::string& name)
......
......@@ -40,6 +40,7 @@ add_library(migraphx_device
device/div.cpp
device/clip.cpp
device/reduce_sum.cpp
device/rsqrt.cpp
device/sqrt.cpp
device/reduce_mean.cpp
device/pow.cpp
......
......@@ -25,7 +25,7 @@ argument gather(hipStream_t stream, argument result, argument arg1, argument arg
arg2.visit([&](auto indices) {
const auto* indices_ptr = device_cast(indices.data());
auto* output_ptr = device_cast(output.data());
gs_launch(stream, nelements)([=](auto i) {
gs_launch(stream, nelements, 256)([=](auto i) {
auto idx = out_comp.multi(i);
idx[axis_index] = indices_ptr[idx[axis_index]];
output_ptr[i] = input[idx];
......
......@@ -8,6 +8,7 @@ namespace device {
void reduce_sum(hipStream_t stream, const argument& result, const argument& arg)
{
reduce(stream, result, arg, sum{}, 0, id{}, id{});
}
......
#include <migraphx/gpu/device/rsqrt.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void rsqrt(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) __device__ { return ::rsqrt(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -167,10 +167,28 @@ rb_type<T>* to_rocblas_type(T* x)
rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_half&>(x); }
void miopen_gemm::batch_not_transposed(const std::vector<std::size_t>& strides) const
{
if(strides.size() <= 2)
return;
auto dim_0 = strides.size() - 2;
auto matrix_size = std::max(strides[dim_0], strides[dim_0 + 1]);
std::vector<std::size_t> batch(strides.begin(), strides.begin() + dim_0);
if(std::adjacent_find(batch.begin(), batch.end(), [&](auto i, auto j) {
return (i < j or i < matrix_size or j < matrix_size);
}) != batch.end())
{
MIGRAPHX_THROW("DOT: batch size {" + to_string_range(strides) + "} is transposed!");
}
}
shape miopen_gemm::compute_shape(const std::vector<shape>& inputs) const
{
std::vector<shape> input_shapes(inputs.begin(), inputs.begin() + inputs.size() - 1);
check_shapes{input_shapes}.not_broadcasted();
batch_not_transposed(inputs[0].strides());
batch_not_transposed(inputs[1].strides());
return op.compute_shape(input_shapes);
}
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_RSQRT_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_RSQRT_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void rsqrt(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -24,6 +24,7 @@ struct miopen_gemm
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
void batch_not_transposed(const std::vector<std::size_t>& strides) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
......
#ifndef MIGRAPHX_GUARD_RTGLIB_RSQRT_HPP
#define MIGRAPHX_GUARD_RTGLIB_RSQRT_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/rsqrt.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct hip_rsqrt : unary_device<hip_rsqrt, device::rsqrt>
{
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -52,6 +52,7 @@
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/reduce_sum.hpp>
#include <migraphx/gpu/rsqrt.hpp>
#include <migraphx/gpu/sqrt.hpp>
#include <migraphx/gpu/reduce_mean.hpp>
#include <migraphx/gpu/pow.hpp>
......@@ -107,6 +108,7 @@ struct miopen_apply
add_generic_op<hip_div>("div");
add_generic_op<hip_max>("max");
add_generic_op<hip_min>("min");
add_generic_op<hip_rsqrt>("rsqrt");
add_generic_op<hip_pow>("pow");
add_generic_op<hip_sqdiff>("sqdiff");
......
......@@ -154,22 +154,27 @@ struct tf_parser
add_generic_op("Identity", op::identity{});
add_generic_op("Relu", op::relu{});
add_generic_op("Relu6", op::clip{6.0, 0.0});
add_generic_op("Rsqrt", op::rsqrt{});
add_generic_op("Tanh", op::tanh{});
add_generic_op("StopGradient", op::identity{});
add_binary_op("Add", op::add{});
add_binary_op("Mul", op::mul{});
add_binary_op("Pow", op::pow{});
add_binary_op("SquaredDifference", op::sqdiff{});
add_binary_op("Sub", op::sub{});
add_mem_op("AvgPool", &tf_parser::parse_pooling);
add_mem_op("BatchMatMul", &tf_parser::parse_matmul, false);
add_mem_op("BiasAdd", &tf_parser::parse_biasadd);
add_mem_op("Cast", &tf_parser::parse_cast, false);
add_mem_op("ConcatV2", &tf_parser::parse_concat, false);
add_mem_op("Const", &tf_parser::parse_constant);
add_mem_op("Conv2D", &tf_parser::parse_conv);
add_mem_op("DepthwiseConv2dNative", &tf_parser::parse_depthwiseconv);
add_mem_op("ExpandDims", &tf_parser::parse_expanddims, false);
add_mem_op("FusedBatchNorm", &tf_parser::parse_batchnorm);
add_mem_op("GatherV2", &tf_parser::parse_gather, false);
add_mem_op("MatMul", &tf_parser::parse_matmul, false);
add_mem_op("MaxPool", &tf_parser::parse_pooling);
add_mem_op("Mean", &tf_parser::parse_mean);
......@@ -177,7 +182,7 @@ struct tf_parser
add_mem_op("Pad", &tf_parser::parse_pad);
add_mem_op("Reshape", &tf_parser::parse_reshape, false);
add_mem_op("Slice", &tf_parser::parse_slice, false);
add_mem_op("Softmax", &tf_parser::parse_softmax);
add_mem_op("Softmax", &tf_parser::parse_softmax<op::softmax>);
add_mem_op("Squeeze", &tf_parser::parse_squeeze, false);
add_mem_op("StridedSlice", &tf_parser::parse_stridedslice);
add_mem_op("Transpose", &tf_parser::parse_transpose, false);
......@@ -306,6 +311,13 @@ struct tf_parser
return prog.add_instruction(op::add{}, args[0], l0);
}
instruction_ref
parse_cast(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
shape::type_t type = parse_type(attributes.at("DstT").type());
return prog.add_instruction(op::convert{type}, std::move(args));
}
instruction_ref
parse_concat(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
......@@ -515,6 +527,14 @@ struct tf_parser
return prog.add_instruction(op::reshape{new_dims}, args[0]);
}
instruction_ref
parse_gather(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
{
int axis = args[2]->eval().at<int32_t>();
op::gather op{axis};
return prog.add_instruction(op, {args[0], args[1]});
}
instruction_ref
parse_matmul(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
......@@ -530,6 +550,15 @@ struct tf_parser
transb = attributes.at("transpose_a").b();
}
if(contains(attributes, "adj_x"))
{
transa = attributes.at("adj_x").b();
}
if(contains(attributes, "adj_y"))
{
transb = attributes.at("adj_y").b();
}
std::vector<int64_t> perm(args[0]->get_shape().lens().size());
std::iota(perm.begin(), perm.end(), int64_t{0});
// swap the last two elements
......@@ -705,14 +734,24 @@ struct tf_parser
}
}
instruction_ref
parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
// template to facilitate the logsoftmax later
template <class Op>
instruction_ref parse_softmax(const std::string&,
const attribute_map& attributes,
std::vector<instruction_ref> args)
{
auto dims = args.front()->get_shape().lens();
auto r =
prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, args.front());
auto s = prog.add_instruction(op::softmax{}, r);
return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
int axis = -1;
auto num_dims = args[0]->get_shape().lens().size();
if(contains(attributes, "axis"))
{
axis = static_cast<int>(attributes.at("axis").i());
}
if(axis < 0)
{
axis += num_dims;
}
return prog.add_instruction(Op{axis}, make_contiguous(args[0]));
}
instruction_ref parse_squeeze(const std::string&,
......@@ -891,72 +930,56 @@ struct tf_parser
shape::type_t shape_type{};
switch(t)
{
case tensorflow::DataType::DT_INVALID:
break; // throw std::runtime_error("Unsupported type UNDEFINED");
case tensorflow::DataType::DT_FLOAT: shape_type = shape::float_type; break;
case tensorflow::DataType::DT_DOUBLE: shape_type = shape::double_type; break;
case tensorflow::DataType::DT_INT32: shape_type = shape::int32_type; break;
case tensorflow::DataType::DT_UINT8:
break; // throw std::runtime_error("Unsupported type UINT8");
case tensorflow::DataType::DT_INT16: shape_type = shape::int16_type; break;
case tensorflow::DataType::DT_INT8: shape_type = shape::int8_type; break;
case tensorflow::DataType::DT_INT64: shape_type = shape::int64_type; break;
case tensorflow::DataType::DT_UINT16: shape_type = shape::uint16_type; break;
case tensorflow::DataType::DT_HALF: shape_type = shape::half_type; break;
case tensorflow::DataType::DT_UINT32: shape_type = shape::uint32_type; break;
case tensorflow::DataType::DT_UINT64: shape_type = shape::uint64_type; break;
case tensorflow::DataType::DT_INVALID:
case tensorflow::DataType::DT_UINT8:
case tensorflow::DataType::DT_STRING:
break; // throw std::runtime_error("Unsupported type STRING");
case tensorflow::DataType::DT_COMPLEX64:
break; // throw std::runtime_error("Unsupported type COMPLEX64");
case tensorflow::DataType::DT_INT64: shape_type = shape::int64_type; break;
case tensorflow::DataType::DT_BOOL:
break; // throw std::runtime_error("Unsupported type BOOL");
case tensorflow::DataType::DT_QINT8:
break; // throw std::runtime_error("Unsupported type QINT8");
case tensorflow::DataType::DT_QUINT8:
break; // throw std::runtime_error("Unsupported type QUINT8");
case tensorflow::DataType::DT_QINT32:
break; // throw std::runtime_error("Unsupported type QINT32");
case tensorflow::DataType::DT_BFLOAT16:
break; // throw std::runtime_error("Unsupported type BFLOAT16");
case tensorflow::DataType::DT_QINT16:
break; // throw std::runtime_error("Unsupported type QINT16");
case tensorflow::DataType::DT_QUINT16:
break; // throw std::runtime_error("Unsupported type QUINT16");
case tensorflow::DataType::DT_UINT16: shape_type = shape::uint16_type; break;
case tensorflow::DataType::DT_COMPLEX128:
break; // throw std::runtime_error("Unsupported type COMPLEX128");
case tensorflow::DataType::DT_HALF: shape_type = shape::half_type; break;
case tensorflow::DataType::DT_RESOURCE:
break; // throw std::runtime_error("Unsupported type RESOURCE");
case tensorflow::DataType::DT_VARIANT:
break; // throw std::runtime_error("Unsupported type VARIANT");
case tensorflow::DataType::DT_UINT32: shape_type = shape::uint32_type; break;
case tensorflow::DataType::DT_UINT64:
shape_type = shape::uint64_type;
break;
// tf pb should not use these types
case tensorflow::DataType::DT_FLOAT_REF: break;
case tensorflow::DataType::DT_DOUBLE_REF: break;
case tensorflow::DataType::DT_INT32_REF: break;
case tensorflow::DataType::DT_UINT8_REF: break;
case tensorflow::DataType::DT_INT16_REF: break;
case tensorflow::DataType::DT_INT8_REF: break;
case tensorflow::DataType::DT_STRING_REF: break;
case tensorflow::DataType::DT_COMPLEX64_REF: break;
case tensorflow::DataType::DT_INT64_REF: break;
case tensorflow::DataType::DT_BOOL_REF: break;
case tensorflow::DataType::DT_QINT8_REF: break;
case tensorflow::DataType::DT_QUINT8_REF: break;
case tensorflow::DataType::DT_QINT32_REF: break;
case tensorflow::DataType::DT_BFLOAT16_REF: break;
case tensorflow::DataType::DT_QINT16_REF: break;
case tensorflow::DataType::DT_QUINT16_REF: break;
case tensorflow::DataType::DT_UINT16_REF: break;
case tensorflow::DataType::DT_COMPLEX128_REF: break;
case tensorflow::DataType::DT_HALF_REF: break;
case tensorflow::DataType::DT_RESOURCE_REF: break;
case tensorflow::DataType::DT_VARIANT_REF: break;
case tensorflow::DataType::DT_UINT32_REF: break;
case tensorflow::DataType::DT_UINT64_REF: break;
case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_: break;
case tensorflow::DataType::DT_FLOAT_REF:
case tensorflow::DataType::DT_DOUBLE_REF:
case tensorflow::DataType::DT_INT32_REF:
case tensorflow::DataType::DT_UINT8_REF:
case tensorflow::DataType::DT_INT16_REF:
case tensorflow::DataType::DT_INT8_REF:
case tensorflow::DataType::DT_STRING_REF:
case tensorflow::DataType::DT_COMPLEX64_REF:
case tensorflow::DataType::DT_INT64_REF:
case tensorflow::DataType::DT_BOOL_REF:
case tensorflow::DataType::DT_QINT8_REF:
case tensorflow::DataType::DT_QUINT8_REF:
case tensorflow::DataType::DT_QINT32_REF:
case tensorflow::DataType::DT_BFLOAT16_REF:
case tensorflow::DataType::DT_QINT16_REF:
case tensorflow::DataType::DT_QUINT16_REF:
case tensorflow::DataType::DT_UINT16_REF:
case tensorflow::DataType::DT_COMPLEX128_REF:
case tensorflow::DataType::DT_HALF_REF:
case tensorflow::DataType::DT_RESOURCE_REF:
case tensorflow::DataType::DT_VARIANT_REF:
case tensorflow::DataType::DT_UINT32_REF:
case tensorflow::DataType::DT_UINT64_REF:
case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_: break;
}
return shape_type;
......@@ -971,61 +994,59 @@ struct tf_parser
const std::string& s = t.tensor_content();
switch(t.dtype())
{
case tensorflow::DataType::DT_INVALID: throw std::runtime_error("");
case tensorflow::DataType::DT_FLOAT:
return literal{{shape::float_type, dims}, s.data()};
case tensorflow::DataType::DT_UINT8: throw std::runtime_error("");
case tensorflow::DataType::DT_BOOL:
case tensorflow::DataType::DT_INT8: return literal{{shape::int8_type, dims}, s.data()};
case tensorflow::DataType::DT_UINT16:
return literal{{shape::uint16_type, dims}, s.data()};
case tensorflow::DataType::DT_INT16:
return literal{{shape::int16_type, dims}, s.data()};
case tensorflow::DataType::DT_INT32:
return literal{{shape::int32_type, dims}, s.data()};
case tensorflow::DataType::DT_INT64:
return literal{{shape::int64_type, dims}, s.data()};
case tensorflow::DataType::DT_STRING: throw std::runtime_error("");
case tensorflow::DataType::DT_BOOL: return literal{{shape::int8_type, dims}, s.data()};
case tensorflow::DataType::DT_HALF: return literal{{shape::half_type, dims}, s.data()};
case tensorflow::DataType::DT_DOUBLE:
return literal{{shape::double_type, dims}, s.data()};
case tensorflow::DataType::DT_UINT32: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT64: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX64: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX128: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT8: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT8: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT32: throw std::runtime_error("");
case tensorflow::DataType::DT_BFLOAT16: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT16: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT16: throw std::runtime_error("");
case tensorflow::DataType::DT_RESOURCE: throw std::runtime_error("");
case tensorflow::DataType::DT_VARIANT: throw std::runtime_error("");
case tensorflow::DataType::DT_FLOAT_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_DOUBLE_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT32_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_STRING_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX64_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT64_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_BOOL_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT32_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_BFLOAT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX128_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_HALF_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_RESOURCE_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_VARIANT_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT32_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT64_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INVALID:
case tensorflow::DataType::DT_UINT8:
case tensorflow::DataType::DT_STRING:
case tensorflow::DataType::DT_UINT32:
case tensorflow::DataType::DT_UINT64:
case tensorflow::DataType::DT_COMPLEX64:
case tensorflow::DataType::DT_COMPLEX128:
case tensorflow::DataType::DT_QINT8:
case tensorflow::DataType::DT_QUINT8:
case tensorflow::DataType::DT_QINT32:
case tensorflow::DataType::DT_BFLOAT16:
case tensorflow::DataType::DT_QINT16:
case tensorflow::DataType::DT_QUINT16:
case tensorflow::DataType::DT_RESOURCE:
case tensorflow::DataType::DT_VARIANT:
case tensorflow::DataType::DT_FLOAT_REF:
case tensorflow::DataType::DT_DOUBLE_REF:
case tensorflow::DataType::DT_INT32_REF:
case tensorflow::DataType::DT_UINT8_REF:
case tensorflow::DataType::DT_INT16_REF:
case tensorflow::DataType::DT_INT8_REF:
case tensorflow::DataType::DT_STRING_REF:
case tensorflow::DataType::DT_COMPLEX64_REF:
case tensorflow::DataType::DT_INT64_REF:
case tensorflow::DataType::DT_BOOL_REF:
case tensorflow::DataType::DT_QINT8_REF:
case tensorflow::DataType::DT_QUINT8_REF:
case tensorflow::DataType::DT_QINT32_REF:
case tensorflow::DataType::DT_BFLOAT16_REF:
case tensorflow::DataType::DT_QINT16_REF:
case tensorflow::DataType::DT_QUINT16_REF:
case tensorflow::DataType::DT_UINT16_REF:
case tensorflow::DataType::DT_COMPLEX128_REF:
case tensorflow::DataType::DT_HALF_REF:
case tensorflow::DataType::DT_RESOURCE_REF:
case tensorflow::DataType::DT_VARIANT_REF:
case tensorflow::DataType::DT_UINT32_REF:
case tensorflow::DataType::DT_UINT64_REF:
case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
throw std::runtime_error("");
case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_:
throw std::runtime_error("");
}
......@@ -1033,11 +1054,9 @@ struct tf_parser
}
switch(t.dtype())
{
case tensorflow::DataType::DT_INVALID: throw std::runtime_error("");
case tensorflow::DataType::DT_FLOAT:
return create_literal(
shape::float_type, dims, get_data_vals(t.float_val(), shape_size));
case tensorflow::DataType::DT_UINT8: throw std::runtime_error("");
case tensorflow::DataType::DT_INT8:
return create_literal(shape::int8_type, dims, get_data_vals(t.int_val(), shape_size));
case tensorflow::DataType::DT_UINT16:
......@@ -1049,7 +1068,6 @@ struct tf_parser
case tensorflow::DataType::DT_INT64:
return create_literal(
shape::int64_type, dims, get_data_vals(t.int64_val(), shape_size));
case tensorflow::DataType::DT_STRING: throw std::runtime_error("");
case tensorflow::DataType::DT_BOOL:
return create_literal(shape::int32_type, dims, get_data_vals(t.bool_val(), shape_size));
case tensorflow::DataType::DT_HALF:
......@@ -1065,43 +1083,45 @@ struct tf_parser
}
case tensorflow::DataType::DT_DOUBLE:
return literal{{shape::double_type, dims}, get_data_vals(t.double_val(), shape_size)};
case tensorflow::DataType::DT_UINT32: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT64: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX64: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX128: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT8: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT8: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT32: throw std::runtime_error("");
case tensorflow::DataType::DT_BFLOAT16: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT16: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT16: throw std::runtime_error("");
case tensorflow::DataType::DT_RESOURCE: throw std::runtime_error("");
case tensorflow::DataType::DT_VARIANT: throw std::runtime_error("");
case tensorflow::DataType::DT_FLOAT_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_DOUBLE_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT32_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_STRING_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX64_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INT64_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_BOOL_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT8_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT32_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_BFLOAT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QINT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_QUINT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT16_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_COMPLEX128_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_HALF_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_RESOURCE_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_VARIANT_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT32_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_UINT64_REF: throw std::runtime_error("");
case tensorflow::DataType::DT_INVALID:
case tensorflow::DataType::DT_UINT8:
case tensorflow::DataType::DT_STRING:
case tensorflow::DataType::DT_UINT32:
case tensorflow::DataType::DT_UINT64:
case tensorflow::DataType::DT_COMPLEX64:
case tensorflow::DataType::DT_COMPLEX128:
case tensorflow::DataType::DT_QINT8:
case tensorflow::DataType::DT_QUINT8:
case tensorflow::DataType::DT_QINT32:
case tensorflow::DataType::DT_BFLOAT16:
case tensorflow::DataType::DT_QINT16:
case tensorflow::DataType::DT_QUINT16:
case tensorflow::DataType::DT_RESOURCE:
case tensorflow::DataType::DT_VARIANT:
case tensorflow::DataType::DT_FLOAT_REF:
case tensorflow::DataType::DT_DOUBLE_REF:
case tensorflow::DataType::DT_INT32_REF:
case tensorflow::DataType::DT_UINT8_REF:
case tensorflow::DataType::DT_INT16_REF:
case tensorflow::DataType::DT_INT8_REF:
case tensorflow::DataType::DT_STRING_REF:
case tensorflow::DataType::DT_COMPLEX64_REF:
case tensorflow::DataType::DT_INT64_REF:
case tensorflow::DataType::DT_BOOL_REF:
case tensorflow::DataType::DT_QINT8_REF:
case tensorflow::DataType::DT_QUINT8_REF:
case tensorflow::DataType::DT_QINT32_REF:
case tensorflow::DataType::DT_BFLOAT16_REF:
case tensorflow::DataType::DT_QINT16_REF:
case tensorflow::DataType::DT_QUINT16_REF:
case tensorflow::DataType::DT_UINT16_REF:
case tensorflow::DataType::DT_COMPLEX128_REF:
case tensorflow::DataType::DT_HALF_REF:
case tensorflow::DataType::DT_RESOURCE_REF:
case tensorflow::DataType::DT_VARIANT_REF:
case tensorflow::DataType::DT_UINT32_REF:
case tensorflow::DataType::DT_UINT64_REF:
case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_:
throw std::runtime_error("");
case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_:
throw std::runtime_error("");
}
......
......@@ -1808,6 +1808,20 @@ TEST_CASE(reduce_sum_axis12)
EXPECT(results_vector == gold);
}
TEST_CASE(rsqrt_test)
{
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {3}};
auto l = p.add_literal(migraphx::literal{s, {4.0, 16.0, 64.0}});
p.add_instruction(migraphx::op::rsqrt{}, l);
p.compile(migraphx::cpu::target{});
auto result = p.eval({});
std::vector<float> results_vector(3);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
std::vector<float> gold = {0.5, 0.25, 0.125};
EXPECT(migraphx::verify_range(results_vector, gold));
}
TEST_CASE(reduce_mean_axis1)
{
migraphx::program p;
......
......@@ -3570,6 +3570,19 @@ struct test_reduce_sum_half : verify_program<test_reduce_sum_half>
};
};
struct test_rsqrt : verify_program<test_rsqrt>
{
migraphx::program create_program() const
{
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {1, 3, 16, 16}};
auto x = p.add_parameter("x", s);
auto l0 = p.add_instruction(migraphx::op::clip{std::numeric_limits<float>::max(), 1.0}, x);
p.add_instruction(migraphx::op::rsqrt{}, l0);
return p;
};
};
struct test_reduce_mean : verify_program<test_reduce_mean>
{
migraphx::program create_program() const
......
 cast-example:F

xy"Cast*
to test_castZ
x



b
y


B
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment