merge changes from the develop branches.

01557ea0 · Shucai Xiao · ec1ab58b · 767ca0cc · 01557ea0 · 01557ea0
Commit 01557ea0 authored May 07, 2019 by Shucai Xiao
20 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -74,3 +74,8 @@ ENV LD_LIBRARY_PATH=$PREFIX/lib
 # Install doc requirements
 ADD doc/requirements.txt /doc-requirements.txt
 RUN pip install -r /doc-requirements.txt
+# Setup ubsan environment to printstacktrace
+RUN ln -s /usr/bin/llvm-symbolizer-5.0 /usr/local/bin/llvm-symbolizer
+ENV UBSAN_OPTIONS=print_stacktrace=1
+ENV ASAN_OPTIONS=detect_stack_use_after_return=1:check_initialization_order=1:strict_init_order=1
--- a/src/include/migraphx/instruction.hpp
+++ b/src/include/migraphx/instruction.hpp
@@ -24,7 +24,7 @@ struct instruction
    instruction(literal l);
-    void replace(const shape& r);
+    void replace(operation o);
    void recompute_shape();
@@ -90,7 +90,8 @@ struct instruction
    // internal
    void replace_argument(instruction_ref old, instruction_ref new_ins);
-    private:
+    void replace(const shape& r);
    operation op;
    shape result;
    std::vector<instruction_ref> output;

--- a/src/include/migraphx/op/abnormal_ops.hpp
+++ b/src/include/migraphx/op/abnormal_ops.hpp
@@ -39,6 +39,11 @@ struct undefined
 struct unknown
 {
    std::string op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.op, "op"));
+    }
    std::string name() const { return "unknown:" + op; }
    shape compute_shape(std::vector<shape> input) const
    {

--- a/src/include/migraphx/op/clip.hpp
+++ b/src/include/migraphx/op/clip.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_CLIP_HPP
+#define MIGRAPHX_GUARD_OPERATORS_CLIP_HPP
+#include <array>
+#include <migraphx/op/unary.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <cmath>
+#include <utility>
+#include <limits>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct clip : unary<clip>
+{
+    float max_val = std::numeric_limits<float>::max();
+    float min_val = std::numeric_limits<float>::min();
+    clip() {}
+    clip(float max, float min) : max_val(max), min_val(min) {}
+    auto apply() const
+    {
+        auto max = max_val;
+        auto min = min_val;
+        return [max, min](auto x) {
+            using type = decltype(x);
+            return std::min(std::max(type(min), x), type(max));
+        };
+    }
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.max_val, "max"), f(self.min_val, "min"));
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/concat.hpp
+++ b/src/include/migraphx/op/concat.hpp
@@ -19,6 +19,13 @@ namespace op {
 struct concat
 {
    std::size_t axis = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.axis, "axis"));
+    }
    std::string name() const { return "concat"; }
    std::vector<std::size_t> compute_offsets(const shape& output_shape,
                                             const std::vector<argument>& args) const

--- a/src/include/migraphx/op/leaky_relu.hpp
+++ b/src/include/migraphx/op/leaky_relu.hpp
@@ -18,19 +18,20 @@ namespace op {
 struct leaky_relu
 {
-    std::string name() const { return "leaky_relu"; }
    float alpha;
-    shape compute_shape(std::vector<shape> inputs) const
-    {
-        check_shapes{inputs, *this}.has(1);
-        return inputs.front();
-    }
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return pack(f(self.alpha, "alpha"));
    }
+    std::string name() const { return "leaky_relu"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        return inputs.front();
+    }
 };
 } // namespace op

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -69,7 +69,7 @@ auto operator<<(std::ostream& os, const T& x) -> decltype(os << x.name())
 {
    os << x.name();
    char delim = '[';
-    reflect_each(x, [&](auto& y, auto name) {
+    reflect_each(x, [&](auto&& y, auto name) {
        os << delim;
        os << name << "=";
        stream_write_value(os, y);
@@ -87,6 +87,8 @@ namespace operation_equal {
 template <class T, class U>
 auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
 {
+    static_assert(is_reflectable<T>{} or sizeof(T) <= 1,
+                  "Missing equality operator or reflect method.");
    if(x.name() != y.name())
        return false;
    const auto& yy = any_cast<T>(y);

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -11,6 +11,7 @@
 #include <migraphx/op/batch_norm.hpp>
 #include <migraphx/op/binary.hpp>
 #include <migraphx/op/broadcast.hpp>
+#include <migraphx/op/clip.hpp>
 #include <migraphx/op/common.hpp>
 #include <migraphx/op/concat.hpp>
 #include <migraphx/op/contiguous.hpp>

--- a/src/include/migraphx/reflect.hpp
+++ b/src/include/migraphx/reflect.hpp
@@ -11,6 +11,15 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace detail {
+struct reflect_placeholder
+{
+    template <class... Ts>
+    int operator()(Ts&&...) const
+    {
+        return 0;
+    }
+};
 template <class T, class Selector>
 auto reflect_impl(rank<1>, T& x, Selector f) -> decltype(T::reflect(x, f))
 {
@@ -23,8 +32,53 @@ auto reflect_impl(rank<0>, T&, Selector)
    return pack();
 }
+template <class T>
+auto reflectable_impl(rank<1>, T&& x)
+    -> decltype(T::reflect(x, reflect_placeholder{}), std::true_type{});
+template <class T>
+auto reflectable_impl(rank<0>, T &&) -> decltype(std::false_type{});
+template <class T>
+struct remove_rvalue_reference
+{
+    using type = T;
+};
+template <class T>
+struct remove_rvalue_reference<T&&>
+{
+    using type = T;
+};
+template <class T>
+struct wrapper
+{
+    using type = typename remove_rvalue_reference<T>::type;
+    type data;
+    type get() const { return data; }
+};
+template <class T>
+wrapper<T> wrap(std::remove_reference_t<T>& x)
+{
+    return wrapper<T>{std::forward<T>(x)};
+}
+template <class... Ts>
+using auto_tuple_t = std::tuple<typename remove_rvalue_reference<Ts>::type...>;
+template <class... Ts>
+auto_tuple_t<Ts...> auto_tuple(Ts&&... xs)
+{
+    return auto_tuple_t<Ts...>{std::forward<Ts>(xs)...};
+}
 } // namespace detail
+template <class T>
+using is_reflectable = decltype(detail::reflectable_impl(rank<1>{}, std::declval<T>()));
 template <class T, class Selector>
 auto reflect(T& x, Selector f)
 {
@@ -34,17 +88,18 @@ auto reflect(T& x, Selector f)
 template <class T>
 auto reflect_tie(T& x)
 {
-    return reflect(x, [](auto&& y, auto&&...) { return std::ref(y); })(
+    return reflect(x, [](auto&& y, auto&&...) { return detail::wrap<decltype(y)>(y); })(
-        [](auto&&... xs) { return std::tie(xs.get()...); });
+        [](auto&&... xs) { return detail::auto_tuple(xs.get()...); });
 }
 template <class T, class F>
 void reflect_each(T& x, F f)
 {
-    return reflect(x, [](auto&& y, auto... ys) { return pack(std::ref(y), ys...); })(
+    return reflect(x, [](auto&& y, auto... ys) {
-        [&](auto&&... xs) {
+        return pack(detail::wrap<decltype(y)>(y), ys...);
-            each_args([&](auto p) { p([&](auto&& y, auto... ys) { f(y.get(), ys...); }); }, xs...);
+    })([&](auto&&... xs) {
-        });
+        each_args([&](auto p) { p([&](auto&& y, auto... ys) { f(y.get(), ys...); }); }, xs...);
+    });
 }
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -28,6 +28,12 @@ void instruction::replace(const shape& r)
    }
 }
+void instruction::replace(operation o)
+{
+    op = std::move(o);
+    recompute_shape();
+}
 void instruction::recompute_shape() { replace(compute_shape(op, arguments)); }
 void instruction::clear_arguments()

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -63,6 +63,7 @@ struct onnx_parser
        add_variadic_op("Max", op::max{});
        add_variadic_op("Min", op::min{});
+        add_mem_op("Clip", &onnx_parser::parse_clip);
        add_mem_op("LRN", &onnx_parser::parse_lrn);
        add_mem_op("ImageScaler", &onnx_parser::parse_imagescaler);
        add_mem_op("LeakyRelu", &onnx_parser::parse_leaky_relu);
@@ -225,6 +226,22 @@ struct onnx_parser
        });
    }
+    instruction_ref parse_clip(const std::string&,
+                               const attribute_map& attributes,
+                               std::vector<instruction_ref> args)
+    {
+        op::clip op;
+        if(contains(attributes, "max"))
+        {
+            op.max_val = parse_value(attributes.at("max")).at<float>();
+        }
+        if(contains(attributes, "min"))
+        {
+            op.min_val = parse_value(attributes.at("min")).at<float>();
+        }
+        return prog.add_instruction(op, std::move(args));
+    }
    instruction_ref
    parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
    {

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -63,11 +63,16 @@ static void print_program(const program& p, F print_func)
    for(auto ins : iterator_for(p))
    {
-        std::string var_name = "@" + std::to_string(count);
+        std::string var_name;
        if(ins->name() == "@param")
        {
            var_name = any_cast<builtin::param>(ins->get_operator()).parameter;
        }
+        else
+        {
+            var_name = "@" + std::to_string(count);
+            count++;
+        }
        names.emplace(ins, var_name);
        // TODO: Use all_of
@@ -78,8 +83,6 @@ static void print_program(const program& p, F print_func)
        }
        print_func(ins, names);
-        count++;
    }
 }

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference
 {
    op::batch_norm_inference op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::batch_norm_inference"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
@@ -107,6 +113,12 @@ struct cpu_lrn
 {
    op::lrn op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::lrn"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, shape output_shape, std::vector<argument> args) const
@@ -144,6 +156,12 @@ struct cpu_convolution
 {
    op::convolution op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::convolution"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, shape output_shape, std::vector<argument> args) const
@@ -190,6 +208,12 @@ struct cpu_im2col
 {
    op::im2col op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    static std::string name() { return "cpu::im2col"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
@@ -271,6 +295,12 @@ struct cpu_pooling
 {
    op::pooling op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::pooling_" + Op::name(); }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
@@ -315,20 +345,35 @@ struct cpu_pooling
    }
 };
-struct cpu_contiguous
+struct cpu_op
 {
-    op::contiguous op;
+    operation op;
-    std::string name() const { return "cpu::contiguous"; }
+    std::string name() const { return "cpu::" + op.name(); }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
    {
-        return op.compute(output_shape, std::move(args));
+        return op.compute(output_shape, args);
    }
+    friend bool operator==(const cpu_op& x, const cpu_op& y) { return x.op == y.op; }
+    friend bool operator==(const cpu_op& x, const operation& y)
+    {
+        if(x.name() != y.name())
+            return false;
+        return x == any_cast<cpu_op>(y);
+    }
+    friend bool operator==(const operation& x, const cpu_op& y) { return y == x; }
 };
 struct cpu_pad
 {
    op::pad op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::contiguous"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
@@ -352,20 +397,15 @@ struct cpu_pad
    }
 };
-struct cpu_concat
-{
-    op::concat op;
-    std::string name() const { return "cpu::concat"; }
-    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
-    {
-        return op.compute(output_shape, std::move(args));
-    }
-};
 struct cpu_gemm
 {
    op::dot op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::dot"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
@@ -408,162 +448,6 @@ struct cpu_gemm
    }
 };
-struct cpu_gather
-{
-    op::gather op;
-    std::string name() const { return "cpu::gather"; }
-    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
-    {
-        return op.compute(output_shape, std::move(args));
-    }
-};
-struct identity_op
-{
-    std::string name() const { return "cpu::identity"; }
-    auto fcn() const
-    {
-        return [](auto x) { return x; };
-    }
-};
-struct abs_op
-{
-    std::string name() const { return "cpu::abs"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::abs(make_signed(x)); };
-    }
-};
-struct exp_op
-{
-    std::string name() const { return "cpu::exp"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::exp(x); };
-    }
-};
-struct log_op
-{
-    std::string name() const { return "cpu::log"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::log(x); };
-    }
-};
-struct sin_op
-{
-    std::string name() const { return "cpu::sin"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::sin(x); };
-    }
-};
-struct cos_op
-{
-    std::string name() const { return "cpu::cos"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::cos(x); };
-    }
-};
-struct tan_op
-{
-    std::string name() const { return "cpu::tan"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::tan(x); };
-    }
-};
-struct asin_op
-{
-    std::string name() const { return "cpu::asin"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::asin(x); };
-    }
-};
-struct acos_op
-{
-    std::string name() const { return "cpu::acos"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::acos(x); };
-    }
-};
-struct atan_op
-{
-    std::string name() const { return "cpu::atan"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::atan(x); };
-    }
-};
-struct sinh_op
-{
-    std::string name() const { return "cpu::sinh"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::sinh(x); };
-    }
-};
-struct cosh_op
-{
-    std::string name() const { return "cpu::cosh"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::cosh(x); };
-    }
-};
-struct tanh_op
-{
-    std::string name() const { return "cpu::tanh"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::tanh(x); };
-    }
-};
-struct sigmoid_op
-{
-    std::string name() const { return "cpu::sigmoid"; }
-    auto fcn() const
-    {
-        return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
-    }
-};
-struct neg_op
-{
-    std::string name() const { return "cpu::neg"; }
-    auto fcn() const
-    {
-        return [](auto x) { return -x; };
-    }
-};
-struct relu_op
-{
-    std::string name() const { return "cpu::relu"; }
-    auto fcn() const
-    {
-        return [](auto x) { return std::max(decltype(x){0}, x); };
-    }
-};
 struct leaky_relu_op
 {
    op::leaky_relu op;
@@ -590,6 +474,12 @@ template <typename Op>
 struct cpu_unary
 {
    Op op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op.op, f);
+    }
    std::string name() const { return op.name(); }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
@@ -668,6 +558,13 @@ struct softmax2d
 struct cpu_logsoftmax
 {
    op::logsoftmax op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "cpu::logsoftmax"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
@@ -734,116 +631,6 @@ struct cpu_logsoftmax
    }
 };
-struct cpu_convert
-{
-    op::convert op;
-    std::string name() const { return "cpu_convert"; }
-    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
-    {
-        return op.compute(output_shape, std::move(args));
-    }
-};
-struct add_op
-{
-    std::string name() const { return "add"; }
-    auto fcn() const
-    {
-        return [](auto x, auto y) { return x + y; };
-    }
-};
-struct sub_op
-{
-    std::string name() const { return "sub"; }
-    auto fcn() const
-    {
-        return [](auto x, auto y) { return x - y; };
-    }
-};
-struct mul_op
-{
-    std::string name() const { return "mul"; }
-    auto fcn() const
-    {
-        return [](auto x, auto y) { return x * y; };
-    }
-};
-struct div_op
-{
-    std::string name() const { return "div"; }
-    auto fcn() const
-    {
-        return [](auto x, auto y) { return x / y; };
-    }
-};
-struct max_op
-{
-    std::string name() const { return "max"; }
-    auto fcn() const
-    {
-        return [](auto x, auto y) { return std::max(x, y); };
-    }
-};
-struct min_op
-{
-    std::string name() const { return "min"; }
-    auto fcn() const
-    {
-        return [](auto x, auto y) { return std::min(x, y); };
-    }
-};
-template <typename Op>
-struct cpu_binary
-{
-    Op op;
-    std::string name() const { return "cpu::" + op.name(); }
-    shape compute_shape(const std::vector<shape>& inputs) const
-    {
-        check_shapes{inputs}.has(2).same_type().same_dims();
-        auto s0 = inputs.at(0);
-        auto s1 = inputs.at(1);
-        if(s0 == s1 and s0.packed())
-        {
-            return s0;
-        }
-        else
-        {
-            return {s0.type(), s0.lens()};
-        }
-    }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
-    {
-        argument result{output_shape};
-        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
-            auto s1 = input1.get_shape();
-            auto s2 = input2.get_shape();
-            if(s1 == s2 and s1.standard())
-            {
-                std::transform(
-                    input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
-            }
-            else
-            {
-                shape_for_each(output.get_shape(), [&](const auto& idx) {
-                    output(idx.begin(), idx.end()) =
-                        op.fcn()(input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end()));
-                });
-            }
-        });
-        return result;
-    }
-};
 struct cpu_apply
 {
    program* prog;
@@ -863,44 +650,17 @@ struct cpu_apply
    void init()
    {
-        apply_map["im2col"]      = extend_op<cpu_im2col, op::im2col>();
-        apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
-        apply_map["dot"]         = extend_op<cpu_gemm, op::dot>();
        apply_map["batch_norm_inference"] =
            extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
-        apply_map["lrn"]        = extend_op<cpu_lrn, op::lrn>();
+        apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
-        apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>();
+        apply_map["dot"]         = extend_op<cpu_gemm, op::dot>();
-        apply_map["pad"]        = extend_op<cpu_pad, op::pad>();
+        apply_map["elu"]         = extend_op<cpu_unary<elu_op>, op::elu>();
-        apply_map["concat"]     = extend_op<cpu_concat, op::concat>();
+        apply_map["im2col"]      = extend_op<cpu_im2col, op::im2col>();
-        apply_map["gather"]     = extend_op<cpu_gather, op::gather>();
+        apply_map["leaky_relu"]  = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
-        apply_map["convert"]    = extend_op<cpu_convert, op::convert>();
+        apply_map["logsoftmax"]  = extend_op<cpu_logsoftmax, op::logsoftmax>();
-        apply_map["logsoftmax"] = extend_op<cpu_logsoftmax, op::logsoftmax>();
+        apply_map["lrn"]         = extend_op<cpu_lrn, op::lrn>();
-        apply_map["leaky_relu"] = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
+        apply_map["pad"]         = extend_op<cpu_pad, op::pad>();
-        apply_map["elu"]        = extend_op<cpu_unary<elu_op>, op::elu>();
+        apply_map["softmax"]     = simple_op<softmax2d>();
-        apply_map["identity"]   = simple_op<cpu_unary<identity_op>>();
-        apply_map["abs"]        = simple_op<cpu_unary<abs_op>>();
-        apply_map["sinh"]       = simple_op<cpu_unary<sinh_op>>();
-        apply_map["cosh"]       = simple_op<cpu_unary<cosh_op>>();
-        apply_map["tanh"]       = simple_op<cpu_unary<tanh_op>>();
-        apply_map["sigmoid"]    = simple_op<cpu_unary<sigmoid_op>>();
-        apply_map["exp"]        = simple_op<cpu_unary<exp_op>>();
-        apply_map["log"]        = simple_op<cpu_unary<log_op>>();
-        apply_map["neg"]        = simple_op<cpu_unary<neg_op>>();
-        apply_map["sin"]        = simple_op<cpu_unary<sin_op>>();
-        apply_map["cos"]        = simple_op<cpu_unary<cos_op>>();
-        apply_map["tan"]        = simple_op<cpu_unary<tan_op>>();
-        apply_map["asin"]       = simple_op<cpu_unary<asin_op>>();
-        apply_map["acos"]       = simple_op<cpu_unary<acos_op>>();
-        apply_map["atan"]       = simple_op<cpu_unary<atan_op>>();
-        apply_map["relu"]       = simple_op<cpu_unary<relu_op>>();
-        apply_map["add"]        = simple_op<cpu_binary<add_op>>();
-        apply_map["sub"]        = simple_op<cpu_binary<sub_op>>();
-        apply_map["mul"]        = simple_op<cpu_binary<mul_op>>();
-        apply_map["div"]        = simple_op<cpu_binary<div_op>>();
-        apply_map["max"]        = simple_op<cpu_binary<max_op>>();
-        apply_map["min"]        = simple_op<cpu_binary<min_op>>();
-        apply_map["softmax"] = simple_op<softmax2d>();
    }
    void apply()
@@ -916,9 +676,18 @@ struct cpu_apply
            {
                apply_map.at(it->name())(it);
            }
+            else if(is_context_free(it->get_operator()))
+            {
+                apply_cpu_op(it);
+            }
        }
    }
+    void apply_cpu_op(instruction_ref ins)
+    {
+        prog->replace_instruction(ins, cpu_op{ins->get_operator()}, ins->inputs());
+    }
    template <class T>
    void apply_simple_op(instruction_ref ins)
    {

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -33,6 +33,7 @@ add_library(migraphx_device
    device/pad.cpp
    device/gather.cpp
    device/sub.cpp
+    device/clip.cpp
 )
 set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
 rocm_clang_tidy_check(migraphx_device)
@@ -67,6 +68,7 @@ add_library(migraphx_gpu
    lrn.cpp
    schedule_model.cpp
    adjust_allocation.cpp
+    clip.cpp
 )
 set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
 rocm_clang_tidy_check(migraphx_gpu)

--- a/src/targets/gpu/clip.cpp
+++ b/src/targets/gpu/clip.cpp
+#include <migraphx/gpu/clip.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/device/clip.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape hip_clip::compute_shape(std::vector<shape> inputs) const
+{
+    inputs.pop_back();
+    return op.compute_shape(inputs);
+}
+argument hip_clip::compute(context& ctx, const shape&, const std::vector<argument>& args) const
+{
+    device::clip(ctx.get_stream().get(), args.back(), args.front(), op.max_val, op.min_val);
+    return args.back();
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/device/clip.cpp
+++ b/src/targets/gpu/device/clip.cpp
+#include <migraphx/gpu/device/clip.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+void clip(hipStream_t stream,
+          const argument& result,
+          const argument& arg1,
+          const float max,
+          const float min)
+{
+    nary(stream, result, arg1)(
+        [max, min](auto x) { return std::min<decltype(x)>(std::max<decltype(x)>(min, x), max); });
+}
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/abs.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/abs.hpp
@@ -13,6 +13,13 @@ struct context;
 struct miopen_abs
 {
    shared<activation_descriptor> ad;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return gpu::reflect(self.ad.get(), f);
+    }
    std::string name() const { return "gpu::abs"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
@@ -13,6 +13,13 @@ struct context;
 struct miopen_batch_norm_inference
 {
    op::batch_norm_inference op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "gpu::batch_norm_inference"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument

--- a/src/targets/gpu/include/migraphx/gpu/clip.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/clip.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
+#include <migraphx/shape.hpp>
+#include <migraphx/op/clip.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct context;
+struct hip_clip
+{
+    op::clip op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
+    std::string name() const { return "gpu::clip"; }
+    shape compute_shape(std::vector<shape> inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/concat.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/concat.hpp
@@ -14,6 +14,12 @@ struct hip_concat
 {
    op::concat op;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return migraphx::reflect(self.op, f);
+    }
    std::string name() const { return "gpu::concat"; }
    shape compute_shape(std::vector<shape> inputs) const;
    argument