Commit 5a1af3d1 authored by Paul's avatar Paul
Browse files

Merge

parents dfc7bbac 6e94e607
......@@ -203,6 +203,8 @@ rocm_enable_cppcheck(
useSmartPointer:*make_shared_array.hpp
constParameter:*src/targets/gpu/*.cpp
constParameter:*src/targets/gpu/*.hpp
# Suppress mlir_conv.cpp since this file will be deleted
*:*src/targets/gpu/mlir_conv.cpp
FORCE
INCONCLUSIVE
RULE_FILE
......
......@@ -2,6 +2,6 @@ pfultz2/rocm-recipes
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ccache@v4.1
pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
danmar/cppcheck@2.6 -DHAVE_RULES=1
danmar/cppcheck@2.8 -DHAVE_RULES=1
RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build
-f requirements.txt
......@@ -62,7 +62,7 @@
"metadata": {},
"outputs": [],
"source": [
"!wget -nc https://github.com/onnx/models/blob/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx"
"!wget -nc https://github.com/onnx/models/raw/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx"
]
},
{
......
......@@ -23,7 +23,7 @@ unzip uncased_L-12_H-768_A-12.zip
```
5) Get BERT ONNX model (bertsquad-10.onnx):
```
wget https://github.com/onnx/models/blob/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx
wget https://github.com/onnx/models/raw/main/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx
```
6) Run the inference, it will compile and run the model on three questions and small data provided in `inputs.json`:
```
......
tensorflow==2.5.3
tensorflow==2.7.2
onnxruntime
tokenizers
\ No newline at end of file
......@@ -24,16 +24,16 @@
"import os.path\n",
"\n",
"if not os.path.exists(\"./utilities/coco.names\"):\n",
" !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/dependencies/coco.names -P ./utilities/\n",
" !wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/dependencies/coco.names -P ./utilities/\n",
"if not os.path.exists(\"./utilities/yolov4_anchors.txt\"):\n",
" !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt -P ./utilities/\n",
" !wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt -P ./utilities/\n",
"if not os.path.exists(\"./utilities/input.jpg\"):\n",
" # The image used is from the COCO dataset (https://cocodataset.org/#explore)\n",
" # Other images can be tested by replacing the link below\n",
" image_link = \"https://farm3.staticflickr.com/2009/2306189268_88cc86b30f_z.jpg\"\n",
" !wget -O ./utilities/input.jpg $image_link\n",
"if not os.path.exists(\"./utilities/yolov4.onnx\"):\n",
" !wget https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/yolov4/model/yolov4.onnx -P ./utilities/"
" !wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/model/yolov4.onnx -P ./utilities/"
]
},
{
......
......@@ -39,10 +39,7 @@ template <class T, class F, class... Ts>
T* make(F f, Ts&&... xs)
{
T* result = nullptr;
// cppcheck-suppress redundantInitialization
// cppcheck-suppress redundantAssignment
// cppcheck-suppress unreadVariable
auto e = f(&result, std::forward<Ts>(xs)...);
auto e = f(&result, std::forward<Ts>(xs)...);
if(e != migraphx_status_success)
throw std::runtime_error("Failed to call function");
return result;
......@@ -51,9 +48,6 @@ T* make(F f, Ts&&... xs)
template <class F, class... Ts>
void call(F f, Ts&&... xs)
{
// cppcheck-suppress redundantInitialization
// cppcheck-suppress redundantAssignment
// cppcheck-suppress unreadVariable
auto e = f(std::forward<Ts>(xs)...);
if(e != migraphx_status_success)
throw std::runtime_error("Failed to call function");
......@@ -340,7 +334,6 @@ struct interface_base : Base
template <class T, class Setter, class F>
void set_auto_fp(Setter setter, F f)
{
// cppcheck-suppress constParameter
return set_fp<T>(setter, [=](T& obj, auto out, auto... xs) {
auto_invoke(f, out, obj, auto_convert_param(rank<2>{}, xs)...);
});
......
......@@ -29,7 +29,6 @@ void argument::assign_buffer(std::function<char*()> d)
// Collect all shapes
std::unordered_map<std::size_t, shape> shapes;
{
// cppcheck-suppress variableScope
std::size_t i = 0;
fix([&](auto self, auto ss) {
if(ss.sub_shapes().empty())
......@@ -60,7 +59,6 @@ void argument::assign_buffer(std::function<char*()> d)
}
assert(offset == s.bytes());
// cppcheck-suppress variableScope
std::size_t i = 0;
m_data = fix<data_t>([&](auto self, auto ss) {
data_t result;
......
......@@ -6,6 +6,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/op/contiguous.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/par_for.hpp>
#include <utility>
namespace migraphx {
......@@ -73,6 +74,8 @@ template <class F>
static void remove_contiguous(const std::string& op_name, module& m, F f)
{
auto last = std::prev(m.end());
std::vector<instruction_ref> const_instruction;
for(auto ins : iterator_for(m))
{
// return instruction should have inputs with standard shape
......@@ -89,6 +92,7 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
auto args = ins->inputs();
auto new_args = args;
auto mod_args = ins->module_inputs();
for(auto arg : ins->inputs())
{
if(arg->name() != op_name)
......@@ -101,14 +105,33 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
}
else if(prev->can_eval())
{
auto c = op::contiguous{};
auto r = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
auto l = m.add_literal(r.get_shape(), r.data());
m.replace_instruction(arg, l);
auto prev = arg->inputs().front();
replace(new_args, arg, prev);
if(try_compute_shape(ins, new_args, mod_args))
{
instruction::replace_argument(ins, arg, prev);
}
else if(prev->can_eval())
{
const_instruction.push_back(arg);
}
}
}
}
// Perform evaluations in parallel
std::vector<argument> literals(const_instruction.size());
par_for(const_instruction.size(), 1, [&](const auto i) {
auto c = op::contiguous{};
auto prev = const_instruction[i]->inputs().front();
literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
});
for(size_t i = 0; i < const_instruction.size(); i++)
{
auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
m.replace_instruction(const_instruction[i], l);
}
}
void eliminate_contiguous::apply(module& m) const
......
......@@ -754,10 +754,16 @@ auto skip_broadcasts(Ms... ms)
return skip(name("broadcast", "multibroadcast", "contiguous"))(ms...);
}
template <class... Ms>
auto skip_broadcasts_converts(Ms... ms)
{
return skip(name("broadcast", "multibroadcast", "contiguous", "convert"))(ms...);
}
template <class T>
inline auto has_value(T x, float tolerance = 1e-6)
{
return skip_broadcasts(make_basic_pred_matcher([=](instruction_ref ins) {
return skip_broadcasts_converts(make_basic_pred_matcher([=](instruction_ref ins) {
if(ins->name() != "@literal")
return false;
auto l = ins->get_literal();
......
......@@ -207,8 +207,7 @@ auto visit_all_pack(const shape& s, V1&& v1)
template <class T, class... Ts>
auto visit_all(T&& x, Ts&&... xs)
{
auto&& s = x.get_shape();
// cppcheck-suppress redundantInitialization
auto&& s = x.get_shape();
std::initializer_list<shape::type_t> types = {xs.get_shape().type()...};
if(!std::all_of(types.begin(), types.end(), [&](shape::type_t t) { return t == s.type(); }))
MIGRAPHX_THROW("Types must be the same");
......
......@@ -50,7 +50,6 @@ auto to_value_impl(rank<2>, const T& x) -> decltype(x.begin(), x.end(), value{})
value result = value::array{};
for(auto&& y : x)
{
auto e = to_value(y);
result.insert(to_value(y));
}
return result;
......
......@@ -120,10 +120,8 @@ struct tensor_view
return m_data[m_shape.index(this->size() - 1)];
}
// cppcheck-suppress functionConst
iterator begin() { return {0, {this}}; }
// cppcheck-suppress functionConst
iterator end() { return {this->size(), {this}}; }
const_iterator begin() const { return {0, {this}}; }
......
......@@ -168,7 +168,6 @@ bool verify_range(const R1& r1, const R2& r2, double tolerance = 80, double* out
{
double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance;
auto error = rms_range(r1, r2);
// cppcheck-suppress uninitvar
if(out_error != nullptr)
*out_error = error;
return error <= threshold;
......
......@@ -729,7 +729,6 @@ std::unordered_map<instruction_ref, std::string>
module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::string> names) const
{
os << "migraphx::module p;" << std::endl;
// cppcheck-suppress variableScope
unsigned long seed = 0;
names = this->print(
[&](auto ins, auto ins_names) {
......
......@@ -2,6 +2,7 @@
#include <migraphx/onnx/checks.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/ranges.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -9,6 +10,9 @@ namespace onnx {
struct parse_mean : op_parser<parse_mean>
{
const std::set<shape::type_t> float_types = {
shape::float_type, shape::half_type, shape::double_type};
std::vector<op_desc> operators() const { return {{"Mean"}}; }
/// Calculates the element-wise mean of n>=1 input tensors
......@@ -24,17 +28,29 @@ struct parse_mean : op_parser<parse_mean>
auto divisor = info.add_literal(
migraphx::literal{migraphx::shape{args[0]->get_shape().type()}, {num_data}});
// TODO: Only divide when using floating-point
return std::accumulate(args.begin() + 1,
args.end(),
info.add_broadcastable_binary_op("div", args[0], divisor),
[&](auto mean, auto data_i) {
// Pre-divide each tensor element-wise by n to reduce risk of
// overflow during summation
auto div =
info.add_broadcastable_binary_op("div", data_i, divisor);
return info.add_broadcastable_binary_op("add", mean, div);
});
if(contains(float_types, args[0]->get_shape().type()))
{
return std::accumulate(args.begin() + 1,
args.end(),
info.add_broadcastable_binary_op("div", args[0], divisor),
[&](auto mean, auto data_i) {
// Pre-divide each tensor element-wise by n to reduce risk of
// overflow during summation
auto div =
info.add_broadcastable_binary_op("div", data_i, divisor);
return info.add_broadcastable_binary_op("add", mean, div);
});
}
else
{
// Compute sum before division for integral types
auto sum = std::accumulate(
args.begin() + 1, args.end(), args[0], [&](auto accum, auto data_i) {
return info.add_broadcastable_binary_op("add", accum, data_i);
});
return info.add_broadcastable_binary_op("div", sum, divisor);
}
}
};
......
......@@ -128,7 +128,7 @@ struct parse_pooling : op_parser<parse_pooling>
std::fill_n(values["stride"].begin(), kdims, 1);
}
// used to calculate the supposed output shape
std::vector<int64_t> orig_padding(paddings.begin(), paddings.end());
std::vector<int64_t> orig_padding = paddings;
std::vector<int64_t> slice_start;
std::vector<int64_t> slice_end;
......
......@@ -30,11 +30,11 @@ struct parse_squeeze : op_parser<parse_squeeze>
std::vector<instruction_ref> args) const
{
auto op = parser.load(opd.op_name, info);
std::vector<int64_t> axes;
if(args.size() == 2)
{
auto arg_axes = args.at(1)->eval();
check_arg_empty(arg_axes, "PARSE_" + opd.op_name + ": cannot handle variable axes!");
std::vector<int64_t> axes;
arg_axes.visit([&](auto s) { axes.assign(s.begin(), s.end()); });
op = assign_axes(op, axes);
}
......
......@@ -20,7 +20,6 @@ int exec(const std::string& cmd, const std::function<void(const char*)>& std_out
int ec = 0;
if(enabled(MIGRAPHX_TRACE_CMD_EXECUTE{}))
std::cout << cmd << std::endl;
std::array<char, 128> buffer;
auto closer = [&](FILE* stream) {
auto status = pclose(stream);
ec = WIFEXITED(status) ? 0 : WEXITSTATUS(status); // NOLINT
......@@ -30,6 +29,7 @@ int exec(const std::string& cmd, const std::function<void(const char*)>& std_out
std::unique_ptr<FILE, decltype(closer)> pipe(popen(cmd.c_str(), "r"), closer); // NOLINT
if(!pipe)
MIGRAPHX_THROW("popen() failed: " + cmd);
std::array<char, 128> buffer;
while(fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr)
std_out(buffer.data());
}
......
......@@ -3,6 +3,7 @@
#include <migraphx/matcher.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/par_for.hpp>
#include <unordered_set>
namespace migraphx {
......@@ -20,33 +21,42 @@ bool skip_propogate(instruction_ref ins)
return false;
}
bool is_const(instruction_ref ins) { return ins->can_eval() and not skip_propogate(ins); }
void propagate_constant::apply(module& m) const
{
std::unordered_set<instruction_ref> const_instrs;
auto last = std::prev(m.end());
// Find instructions that can be evaluated to a literal
for(auto i : iterator_for(m))
{
if(i->name() != "@literal")
if(is_const(i) and i != last)
continue;
if(i->outputs().empty())
continue;
fix([&](auto self, auto ins) {
std::unordered_set<instruction_ref> children(ins->outputs().begin(),
ins->outputs().end());
for(auto child : children)
{
if(child->name() == "@literal" or skip_propogate(child))
{
self(child);
continue;
}
auto r = child->eval();
if(not r.empty())
{
assert(r.get_shape() == child->get_shape());
auto l = m.add_literal(r.get_shape(), r.data());
self(m.replace_instruction(child, l));
}
}
})(i);
std::copy_if(
i->inputs().begin(),
i->inputs().end(),
std::inserter(const_instrs, const_instrs.begin()),
[&](const instruction_ref ins) { return is_const(ins) and ins->name() != "@literal"; });
}
// Compute literals in parallel
std::vector<instruction_ref> const_instrs_vec{const_instrs.begin(), const_instrs.end()};
std::vector<argument> literals(const_instrs_vec.size());
par_for(const_instrs_vec.size(), 1, [&](const auto i) {
literals[i] = const_instrs_vec[i]->eval();
});
// Replace instructions in m
for(size_t i = 0; i < const_instrs_vec.size(); i++)
{
if(not literals[i].empty())
{
assert(literals[i].get_shape() == const_instrs_vec[i]->get_shape());
auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
m.replace_instruction(const_instrs_vec[i], l);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment