Merge

5a1af3d1 · Paul · dfc7bbac · 6e94e607 · 5a1af3d1 · 5a1af3d1
Commit 5a1af3d1 authored May 31, 2022 by Paul
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,8 @@ rocm_enable_cppcheck(
        useSmartPointer:*make_shared_array.hpp
        constParameter:*src/targets/gpu/*.cpp
        constParameter:*src/targets/gpu/*.hpp
+        # Suppress mlir_conv.cpp since this file will be deleted
+        *:*src/targets/gpu/mlir_conv.cpp
    FORCE
    INCONCLUSIVE
    RULE_FILE

--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -2,6 +2,6 @@ pfultz2/rocm-recipes
 facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
 ccache@v4.1
 pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
-danmar/cppcheck@2.6 -DHAVE_RULES=1
+danmar/cppcheck@2.8 -DHAVE_RULES=1
 RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build
 -f requirements.txt
--- a/examples/nlp/python_bert_squad/BERT-Squad.ipynb
+++ b/examples/nlp/python_bert_squad/BERT-Squad.ipynb
@@ -62,7 +62,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "!wget -nc https://github.com/onnx/models/blob/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx"
+    "!wget -nc https://github.com/onnx/models/raw/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx"
   ]
  },
  {

--- a/examples/nlp/python_bert_squad/README.md
+++ b/examples/nlp/python_bert_squad/README.md
@@ -23,7 +23,7 @@ unzip uncased_L-12_H-768_A-12.zip
 ```
 5) Get BERT ONNX model (bertsquad-10.onnx):
 ```
-wget https://github.com/onnx/models/blob/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx
+wget https://github.com/onnx/models/raw/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx
 ```
 6) Run the inference, it will compile and run the model on three questions and small data provided in `inputs.json`:
 ```

--- a/examples/nlp/python_bert_squad/requirements_bertsquad.txt
+++ b/examples/nlp/python_bert_squad/requirements_bertsquad.txt
-tensorflow==2.5.3
+tensorflow==2.7.2
 onnxruntime
 tokenizers
\ No newline at end of file
--- a/examples/vision/python_yolov4/yolov4_inference.ipynb
+++ b/examples/vision/python_yolov4/yolov4_inference.ipynb
@@ -24,16 +24,16 @@
    "import os.path\n",
    "\n",
    "if not os.path.exists(\"./utilities/coco.names\"):\n",
-    "    !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/dependencies/coco.names -P ./utilities/\n",
+    "    !wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/dependencies/coco.names -P ./utilities/\n",
    "if not os.path.exists(\"./utilities/yolov4_anchors.txt\"):\n",
-    "    !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt -P ./utilities/\n",
+    "    !wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt -P ./utilities/\n",
    "if not os.path.exists(\"./utilities/input.jpg\"):\n",
    "    # The image used is from the COCO dataset (https://cocodataset.org/#explore)\n",
    "    # Other images can be tested by replacing the link below\n",
    "    image_link = \"https://farm3.staticflickr.com/2009/2306189268_88cc86b30f_z.jpg\"\n",
    "    !wget -O ./utilities/input.jpg $image_link\n",
    "if not os.path.exists(\"./utilities/yolov4.onnx\"):\n",
-    "    !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/model/yolov4.onnx -P ./utilities/"
+    "    !wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/model/yolov4.onnx -P ./utilities/"
   ]
  },
  {

--- a/src/api/include/migraphx/migraphx.hpp
+++ b/src/api/include/migraphx/migraphx.hpp
@@ -39,10 +39,7 @@ template <class T, class F, class... Ts>
 T* make(F f, Ts&&... xs)
 {
    T* result = nullptr;
-    // cppcheck-suppress redundantInitialization
-    // cppcheck-suppress redundantAssignment
-    // cppcheck-suppress unreadVariable
-    auto e = f(&result, std::forward<Ts>(xs)...);
+    auto e    = f(&result, std::forward<Ts>(xs)...);
    if(e != migraphx_status_success)
        throw std::runtime_error("Failed to call function");
    return result;
@@ -51,9 +48,6 @@ T* make(F f, Ts&&... xs)
 template <class F, class... Ts>
 void call(F f, Ts&&... xs)
 {
-    // cppcheck-suppress redundantInitialization
-    // cppcheck-suppress redundantAssignment
-    // cppcheck-suppress unreadVariable
    auto e = f(std::forward<Ts>(xs)...);
    if(e != migraphx_status_success)
        throw std::runtime_error("Failed to call function");
@@ -340,7 +334,6 @@ struct interface_base : Base
    template <class T, class Setter, class F>
    void set_auto_fp(Setter setter, F f)
    {
-        // cppcheck-suppress constParameter
        return set_fp<T>(setter, [=](T& obj, auto out, auto... xs) {
            auto_invoke(f, out, obj, auto_convert_param(rank<2>{}, xs)...);
        });

--- a/src/argument.cpp
+++ b/src/argument.cpp
@@ -29,7 +29,6 @@ void argument::assign_buffer(std::function<char*()> d)
    // Collect all shapes
    std::unordered_map<std::size_t, shape> shapes;
    {
-        // cppcheck-suppress variableScope
        std::size_t i = 0;
        fix([&](auto self, auto ss) {
            if(ss.sub_shapes().empty())
@@ -60,7 +59,6 @@ void argument::assign_buffer(std::function<char*()> d)
    }
    assert(offset == s.bytes());

-    // cppcheck-suppress variableScope
    std::size_t i = 0;
    m_data        = fix<data_t>([&](auto self, auto ss) {
        data_t result;

--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -6,6 +6,7 @@
 #include <migraphx/stringutils.hpp>
 #include <migraphx/op/contiguous.hpp>
 #include <migraphx/op/identity.hpp>
+#include <migraphx/par_for.hpp>
 #include <utility>

 namespace migraphx {
@@ -73,6 +74,8 @@ template <class F>
 static void remove_contiguous(const std::string& op_name, module& m, F f)
 {
    auto last = std::prev(m.end());
+    std::vector<instruction_ref> const_instruction;
+
    for(auto ins : iterator_for(m))
    {
        // return instruction should have inputs with standard shape
@@ -89,6 +92,7 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
        auto args     = ins->inputs();
        auto new_args = args;
        auto mod_args = ins->module_inputs();
+
        for(auto arg : ins->inputs())
        {
            if(arg->name() != op_name)
@@ -101,14 +105,33 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
            }
            else if(prev->can_eval())
            {
-                auto c = op::contiguous{};
-                auto r = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
-
-                auto l = m.add_literal(r.get_shape(), r.data());
-                m.replace_instruction(arg, l);
+                auto prev = arg->inputs().front();
+                replace(new_args, arg, prev);
+                if(try_compute_shape(ins, new_args, mod_args))
+                {
+                    instruction::replace_argument(ins, arg, prev);
+                }
+                else if(prev->can_eval())
+                {
+                    const_instruction.push_back(arg);
+                }
            }
        }
    }
+
+    // Perform evaluations in parallel
+    std::vector<argument> literals(const_instruction.size());
+    par_for(const_instruction.size(), 1, [&](const auto i) {
+        auto c      = op::contiguous{};
+        auto prev   = const_instruction[i]->inputs().front();
+        literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
+    });
+
+    for(size_t i = 0; i < const_instruction.size(); i++)
+    {
+        auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
+        m.replace_instruction(const_instruction[i], l);
+    }
 }

 void eliminate_contiguous::apply(module& m) const

--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -754,10 +754,16 @@ auto skip_broadcasts(Ms... ms)
    return skip(name("broadcast", "multibroadcast", "contiguous"))(ms...);
 }

+template <class... Ms>
+auto skip_broadcasts_converts(Ms... ms)
+{
+    return skip(name("broadcast", "multibroadcast", "contiguous", "convert"))(ms...);
+}
+
 template <class T>
 inline auto has_value(T x, float tolerance = 1e-6)
 {
-    return skip_broadcasts(make_basic_pred_matcher([=](instruction_ref ins) {
+    return skip_broadcasts_converts(make_basic_pred_matcher([=](instruction_ref ins) {
        if(ins->name() != "@literal")
            return false;
        auto l = ins->get_literal();

--- a/src/include/migraphx/raw_data.hpp
+++ b/src/include/migraphx/raw_data.hpp
@@ -207,8 +207,7 @@ auto visit_all_pack(const shape& s, V1&& v1)
 template <class T, class... Ts>
 auto visit_all(T&& x, Ts&&... xs)
 {
-    auto&& s = x.get_shape();
-    // cppcheck-suppress redundantInitialization
+    auto&& s                                   = x.get_shape();
    std::initializer_list<shape::type_t> types = {xs.get_shape().type()...};
    if(!std::all_of(types.begin(), types.end(), [&](shape::type_t t) { return t == s.type(); }))
        MIGRAPHX_THROW("Types must be the same");

--- a/src/include/migraphx/serialize.hpp
+++ b/src/include/migraphx/serialize.hpp
@@ -50,7 +50,6 @@ auto to_value_impl(rank<2>, const T& x) -> decltype(x.begin(), x.end(), value{})
    value result = value::array{};
    for(auto&& y : x)
    {
-        auto e = to_value(y);
        result.insert(to_value(y));
    }
    return result;

--- a/src/include/migraphx/tensor_view.hpp
+++ b/src/include/migraphx/tensor_view.hpp
@@ -120,10 +120,8 @@ struct tensor_view
        return m_data[m_shape.index(this->size() - 1)];
    }

-    // cppcheck-suppress functionConst
    iterator begin() { return {0, {this}}; }

-    // cppcheck-suppress functionConst
    iterator end() { return {this->size(), {this}}; }

    const_iterator begin() const { return {0, {this}}; }

--- a/src/include/migraphx/verify.hpp
+++ b/src/include/migraphx/verify.hpp
@@ -168,7 +168,6 @@ bool verify_range(const R1& r1, const R2& r2, double tolerance = 80, double* out
 {
    double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance;
    auto error       = rms_range(r1, r2);
-    // cppcheck-suppress uninitvar
    if(out_error != nullptr)
        *out_error = error;
    return error <= threshold;

--- a/src/module.cpp
+++ b/src/module.cpp
@@ -729,7 +729,6 @@ std::unordered_map<instruction_ref, std::string>
 module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::string> names) const
 {
    os << "migraphx::module p;" << std::endl;
-    // cppcheck-suppress variableScope
    unsigned long seed = 0;
    names              = this->print(
        [&](auto ins, auto ins_names) {

--- a/src/onnx/parse_mean.cpp
+++ b/src/onnx/parse_mean.cpp
@@ -2,6 +2,7 @@
 #include <migraphx/onnx/checks.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/make_op.hpp>
+#include <migraphx/ranges.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -9,6 +10,9 @@ namespace onnx {

 struct parse_mean : op_parser<parse_mean>
 {
+    const std::set<shape::type_t> float_types = {
+        shape::float_type, shape::half_type, shape::double_type};
+
    std::vector<op_desc> operators() const { return {{"Mean"}}; }

    /// Calculates the element-wise mean of n>=1 input tensors
@@ -24,17 +28,29 @@ struct parse_mean : op_parser<parse_mean>
        auto divisor = info.add_literal(
            migraphx::literal{migraphx::shape{args[0]->get_shape().type()}, {num_data}});

-        // TODO: Only divide when using floating-point
-        return std::accumulate(args.begin() + 1,
-                               args.end(),
-                               info.add_broadcastable_binary_op("div", args[0], divisor),
-                               [&](auto mean, auto data_i) {
-                                   // Pre-divide each tensor element-wise by n to reduce risk of
-                                   // overflow during summation
-                                   auto div =
-                                       info.add_broadcastable_binary_op("div", data_i, divisor);
-                                   return info.add_broadcastable_binary_op("add", mean, div);
-                               });
+        if(contains(float_types, args[0]->get_shape().type()))
+        {
+            return std::accumulate(args.begin() + 1,
+                                   args.end(),
+                                   info.add_broadcastable_binary_op("div", args[0], divisor),
+                                   [&](auto mean, auto data_i) {
+                                       // Pre-divide each tensor element-wise by n to reduce risk of
+                                       // overflow during summation
+                                       auto div =
+                                           info.add_broadcastable_binary_op("div", data_i, divisor);
+                                       return info.add_broadcastable_binary_op("add", mean, div);
+                                   });
+        }
+        else
+        {
+            // Compute sum before division for integral types
+            auto sum = std::accumulate(
+                args.begin() + 1, args.end(), args[0], [&](auto accum, auto data_i) {
+                    return info.add_broadcastable_binary_op("add", accum, data_i);
+                });
+
+            return info.add_broadcastable_binary_op("div", sum, divisor);
+        }
    }
 };


--- a/src/onnx/parse_pooling.cpp
+++ b/src/onnx/parse_pooling.cpp
@@ -128,7 +128,7 @@ struct parse_pooling : op_parser<parse_pooling>
            std::fill_n(values["stride"].begin(), kdims, 1);
        }
        // used to calculate the supposed output shape
-        std::vector<int64_t> orig_padding(paddings.begin(), paddings.end());
+        std::vector<int64_t> orig_padding = paddings;

        std::vector<int64_t> slice_start;
        std::vector<int64_t> slice_end;

--- a/src/onnx/parse_squeeze.cpp
+++ b/src/onnx/parse_squeeze.cpp
@@ -30,11 +30,11 @@ struct parse_squeeze : op_parser<parse_squeeze>
                          std::vector<instruction_ref> args) const
    {
        auto op = parser.load(opd.op_name, info);
-        std::vector<int64_t> axes;
        if(args.size() == 2)
        {
            auto arg_axes = args.at(1)->eval();
            check_arg_empty(arg_axes, "PARSE_" + opd.op_name + ": cannot handle variable axes!");
+            std::vector<int64_t> axes;
            arg_axes.visit([&](auto s) { axes.assign(s.begin(), s.end()); });
            op = assign_axes(op, axes);
        }

--- a/src/process.cpp
+++ b/src/process.cpp
@@ -20,7 +20,6 @@ int exec(const std::string& cmd, const std::function<void(const char*)>& std_out
    int ec = 0;
    if(enabled(MIGRAPHX_TRACE_CMD_EXECUTE{}))
        std::cout << cmd << std::endl;
-    std::array<char, 128> buffer;
    auto closer = [&](FILE* stream) {
        auto status = pclose(stream);
        ec          = WIFEXITED(status) ? 0 : WEXITSTATUS(status); // NOLINT
@@ -30,6 +29,7 @@ int exec(const std::string& cmd, const std::function<void(const char*)>& std_out
        std::unique_ptr<FILE, decltype(closer)> pipe(popen(cmd.c_str(), "r"), closer); // NOLINT
        if(!pipe)
            MIGRAPHX_THROW("popen() failed: " + cmd);
+        std::array<char, 128> buffer;
        while(fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr)
            std_out(buffer.data());
    }

--- a/src/propagate_constant.cpp
+++ b/src/propagate_constant.cpp
@@ -3,6 +3,7 @@
 #include <migraphx/matcher.hpp>
 #include <migraphx/literal.hpp>
 #include <migraphx/functional.hpp>
+#include <migraphx/par_for.hpp>
 #include <unordered_set>

 namespace migraphx {
@@ -20,33 +21,42 @@ bool skip_propogate(instruction_ref ins)
    return false;
 }

+bool is_const(instruction_ref ins) { return ins->can_eval() and not skip_propogate(ins); }
+
 void propagate_constant::apply(module& m) const
 {
+    std::unordered_set<instruction_ref> const_instrs;
+    auto last = std::prev(m.end());
+
+    // Find instructions that can be evaluated to a literal
    for(auto i : iterator_for(m))
    {
-        if(i->name() != "@literal")
+        if(is_const(i) and i != last)
            continue;
-        if(i->outputs().empty())
-            continue;
-        fix([&](auto self, auto ins) {
-            std::unordered_set<instruction_ref> children(ins->outputs().begin(),
-                                                         ins->outputs().end());
-            for(auto child : children)
-            {
-                if(child->name() == "@literal" or skip_propogate(child))
-                {
-                    self(child);
-                    continue;
-                }
-                auto r = child->eval();
-                if(not r.empty())
-                {
-                    assert(r.get_shape() == child->get_shape());
-                    auto l = m.add_literal(r.get_shape(), r.data());
-                    self(m.replace_instruction(child, l));
-                }
-            }
-        })(i);
+
+        std::copy_if(
+            i->inputs().begin(),
+            i->inputs().end(),
+            std::inserter(const_instrs, const_instrs.begin()),
+            [&](const instruction_ref ins) { return is_const(ins) and ins->name() != "@literal"; });
+    }
+
+    // Compute literals in parallel
+    std::vector<instruction_ref> const_instrs_vec{const_instrs.begin(), const_instrs.end()};
+    std::vector<argument> literals(const_instrs_vec.size());
+    par_for(const_instrs_vec.size(), 1, [&](const auto i) {
+        literals[i] = const_instrs_vec[i]->eval();
+    });
+
+    // Replace instructions in m
+    for(size_t i = 0; i < const_instrs_vec.size(); i++)
+    {
+        if(not literals[i].empty())
+        {
+            assert(literals[i].get_shape() == const_instrs_vec[i]->get_shape());
+            auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
+            m.replace_instruction(const_instrs_vec[i], l);
+        }
    }
 }