Commit 038a4c52 authored by wsttiger's avatar wsttiger
Browse files

Merged from master still debugging resnet

parents 06cc4f8f 905d4ab0
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <unordered_map> #include <unordered_map>
#include <functional> #include <functional>
#include <array> #include <array>
#include <utility>
#include <vector> #include <vector>
#include <migraph/fallthrough.hpp> #include <migraph/fallthrough.hpp>
...@@ -27,7 +28,7 @@ struct unknown ...@@ -27,7 +28,7 @@ struct unknown
else else
return input.front(); return input.front();
} }
argument compute(context&, shape, std::vector<argument>) const argument compute(context&, const shape&, const std::vector<argument>&) const
{ {
MIGRAPH_THROW("not computable"); MIGRAPH_THROW("not computable");
} }
...@@ -103,7 +104,7 @@ struct onnx_parser ...@@ -103,7 +104,7 @@ struct onnx_parser
} }
instruction_ref instruction_ref
parse_conv(std::string, attribute_map attributes, std::vector<instruction_ref> args) parse_conv(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{ {
convolution op; convolution op;
if(contains(attributes, "pads")) if(contains(attributes, "pads"))
...@@ -128,8 +129,9 @@ struct onnx_parser ...@@ -128,8 +129,9 @@ struct onnx_parser
return prog.add_instruction(op, args); return prog.add_instruction(op, args);
} }
instruction_ref instruction_ref parse_pooling(const std::string& name,
parse_pooling(std::string name, attribute_map attributes, std::vector<instruction_ref> args) attribute_map attributes,
std::vector<instruction_ref> args)
{ {
pooling op{name == "MaxPool" ? "max" : "average"}; pooling op{name == "MaxPool" ? "max" : "average"};
if(contains(attributes, "pads")) if(contains(attributes, "pads"))
...@@ -144,30 +146,11 @@ struct onnx_parser ...@@ -144,30 +146,11 @@ struct onnx_parser
{ {
copy(attributes["kernel_shape"].ints(), op.lengths.begin()); copy(attributes["kernel_shape"].ints(), op.lengths.begin());
} }
return prog.add_instruction(op, args); return prog.add_instruction(op, std::move(args));
} }
instruction_ref instruction_ref
parse_average_pooling(std::string, attribute_map attributes, std::vector<instruction_ref> args) parse_reshape(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{
pooling op{"average"};
if(contains(attributes, "pads"))
{
copy(attributes["pads"].ints(), op.padding.begin());
}
if(contains(attributes, "strides"))
{
copy(attributes["strides"].ints(), op.stride.begin());
}
if(contains(attributes, "kernel_shape"))
{
copy(attributes["kernel_shape"].ints(), op.lengths.begin());
}
return prog.add_instruction(op, args);
}
instruction_ref
parse_reshape(std::string, attribute_map attributes, std::vector<instruction_ref> args)
{ {
reshape op; reshape op;
if(args.size() == 1) if(args.size() == 1)
...@@ -184,7 +167,7 @@ struct onnx_parser ...@@ -184,7 +167,7 @@ struct onnx_parser
} }
instruction_ref instruction_ref
parse_flatten(std::string, attribute_map attributes, std::vector<instruction_ref> args) parse_flatten(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{ {
uint64_t axis = 0; uint64_t axis = 0;
if(contains(attributes, "axis")) if(contains(attributes, "axis"))
...@@ -194,15 +177,16 @@ struct onnx_parser ...@@ -194,15 +177,16 @@ struct onnx_parser
return prog.add_instruction(flatten{axis}, args[0]); return prog.add_instruction(flatten{axis}, args[0]);
} }
instruction_ref instruction_ref parse_constant(const std::string&,
parse_constant(std::string, attribute_map attributes, std::vector<instruction_ref>) attribute_map attributes,
const std::vector<instruction_ref>&)
{ {
literal v = parse_value(attributes.at("value")); literal v = parse_value(attributes.at("value"));
return prog.add_literal(v); return prog.add_literal(v);
} }
instruction_ref instruction_ref
parse_gemm(std::string, attribute_map attributes, std::vector<instruction_ref> args) parse_gemm(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{ {
float alpha = 1.0f; float alpha = 1.0f;
float beta = 0.0f; float beta = 0.0f;
...@@ -238,7 +222,7 @@ struct onnx_parser ...@@ -238,7 +222,7 @@ struct onnx_parser
} }
instruction_ref instruction_ref
parse_batchnorm(std::string, attribute_map attributes, std::vector<instruction_ref> args) parse_batchnorm(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
{ {
float epsilon = 1e-5f; float epsilon = 1e-5f;
float momentum = 0.9f; float momentum = 0.9f;
...@@ -263,7 +247,7 @@ struct onnx_parser ...@@ -263,7 +247,7 @@ struct onnx_parser
: batch_norm_inference::per_activation; : batch_norm_inference::per_activation;
} }
batch_norm_inference op{epsilon, momentum, bn_mode, is_test}; batch_norm_inference op{epsilon, momentum, bn_mode, is_test};
return prog.add_instruction(op, args); return prog.add_instruction(op, std::move(args));
} }
void parse_from(std::istream& is) void parse_from(std::istream& is)
...@@ -312,7 +296,7 @@ struct onnx_parser ...@@ -312,7 +296,7 @@ struct onnx_parser
} }
} }
void parse_node(std::string name) void parse_node(const std::string& name)
{ {
if(name.empty()) if(name.empty())
MIGRAPH_THROW("Onnx node must have a name"); MIGRAPH_THROW("Onnx node must have a name");
......
#include <migraph/onnx.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/generate.hpp>
#include <migraph/verify.hpp>
migraph::program::parameter_map create_param_map(const migraph::program& p, bool gpu = true)
{
migraph::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
if(gpu)
m[x.first] = migraph::gpu::to_gpu(migraph::generate_argument(x.second));
else
m[x.first] = migraph::generate_argument(x.second);
}
return m;
}
int main(int argc, char const* argv[])
{
if(argc > 1)
{
std::string file = argv[1];
std::size_t n = argc > 2 ? std::stoul(argv[2]) : 50;
auto p = migraph::parse_onnx(file);
std::cout << "Compiling ... " << std::endl;
p.compile(migraph::gpu::target{});
std::cout << "Allocating params ... " << std::endl;
auto m = create_param_map(p);
std::cout << "Running performance report ... " << std::endl;
p.perf_report(std::cout, n, m);
}
}
...@@ -61,52 +61,49 @@ int main(int argc, char const* argv[]) ...@@ -61,52 +61,49 @@ int main(int argc, char const* argv[])
std::cout << prog << std::endl; std::cout << prog << std::endl;
auto imageset = read_cifar10_images(datafile); auto imageset = read_cifar10_images(datafile);
// GPU target // // GPU target
prog.compile(migraph::gpu::target{}); // prog.compile(migraph::gpu::target{});
migraph::program::parameter_map m; // migraph::program::parameter_map m;
auto s = migraph::shape{migraph::shape::float_type, {1, 3, 32, 32}};
m["output"] =
migraph::gpu::to_gpu(migraph::generate_argument(prog.get_parameter_shape("output")));
auto labels = imageset.first;
auto input = imageset.second;
auto ptr = input.data();
for(int i = 0; i < 10; i++)
{
std::cout << "label: " << (uint32_t)labels[i] << " ----> ";
m["0"] = migraph::gpu::to_gpu(migraph::argument{s, &ptr[3072 * i]});
auto result = migraph::gpu::from_gpu(prog.eval(m));
std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax(logits);
for(auto x : probs)
std::cout << x << " ";
std::cout << std::endl;
std::cout << std::endl;
for (int j = 0; j < 10; j++) {
std::cout << 255.0*input[i*3072+j] << " ";
}
std::cout << std::endl;
std::cout << std::endl;
std::cout << std::endl;
}
// // // CPU target
// prog.compile(migraph::cpu::cpu_target{});
// auto s = migraph::shape{migraph::shape::float_type, {1, 3, 32, 32}}; // auto s = migraph::shape{migraph::shape::float_type, {1, 3, 32, 32}};
// for(auto&& x : prog.get_parameter_shapes())
// {
// m[x.first] = migraph::gpu::to_gpu(migraph::generate_argument(x.second));
// }
// auto labels = imageset.first; // auto labels = imageset.first;
// auto input = imageset.second; // auto input = imageset.second;
// auto ptr = input.data(); // auto ptr = input.data();
// for(int i = 0; i < 10; i++) // for(int i = 0; i < 10; i++)
// { // {
// std::cout << "label: " << (uint32_t)labels[i] << " ----> "; // std::cout << "label: " << (uint32_t)labels[i] << " ----> ";
// auto input3 = migraph::argument{s, &ptr[3072 * i]}; // m["0"] = migraph::gpu::to_gpu(migraph::argument{s, &ptr[3072 * i]});
// auto result = prog.eval({{"0", input3}}); // auto result = migraph::gpu::from_gpu(prog.eval(m));
// std::vector<float> logits; // std::vector<float> logits;
// result.visit([&](auto output) { logits.assign(output.begin(), output.end()); }); // result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
// std::vector<float> probs = softmax(logits); // std::vector<float> probs = softmax(logits);
// for(auto x : logits) // for(auto x : logits)
// std::cout << x << " "; // //std::cout << x << " ";
// //std::cout << x << std::endl;
// printf("%10.5e ", x);
// std::cout << std::endl;
// std::cout << std::endl; // std::cout << std::endl;
// } // }
// // CPU target
prog.compile(migraph::cpu::cpu_target{});
auto s = migraph::shape{migraph::shape::float_type, {1, 3, 32, 32}};
auto labels = imageset.first;
auto input = imageset.second;
auto ptr = input.data();
for(int i = 0; i < 10; i++)
{
std::cout << "label: " << (uint32_t)labels[i] << " ----> ";
auto input3 = migraph::argument{s, &ptr[3072 * i]};
auto result = prog.eval({{"0", input3}});
std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax(logits);
for(auto x : logits)
printf("%10.5e ", x);
std::cout << std::endl;
}
} }
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <migraph/generate.hpp> #include <migraph/generate.hpp>
#include <migraph/verify.hpp> #include <migraph/verify.hpp>
migraph::argument run_cpu(std::string file) migraph::argument run_cpu(const std::string& file)
{ {
auto p = migraph::parse_onnx(file); auto p = migraph::parse_onnx(file);
p.compile(migraph::cpu::cpu_target{}); p.compile(migraph::cpu::cpu_target{});
...@@ -21,7 +21,7 @@ migraph::argument run_cpu(std::string file) ...@@ -21,7 +21,7 @@ migraph::argument run_cpu(std::string file)
return out; return out;
} }
migraph::argument run_gpu(std::string file) migraph::argument run_gpu(const std::string& file)
{ {
auto p = migraph::parse_onnx(file); auto p = migraph::parse_onnx(file);
p.compile(migraph::gpu::target{}); p.compile(migraph::gpu::target{});
......
...@@ -2,9 +2,12 @@ ...@@ -2,9 +2,12 @@
#include <migraph/stringutils.hpp> #include <migraph/stringutils.hpp>
#include <migraph/instruction.hpp> #include <migraph/instruction.hpp>
#include <migraph/env.hpp> #include <migraph/env.hpp>
#include <migraph/time.hpp>
#include <migraph/iterator_for.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
#include <utility>
namespace migraph { namespace migraph {
...@@ -19,18 +22,68 @@ struct program_impl ...@@ -19,18 +22,68 @@ struct program_impl
const operation& get_operation(instruction_ref ins) { return ins->op; } const operation& get_operation(instruction_ref ins) { return ins->op; }
template <class F>
static void print_program(std::ostream& os, const program& p, F annonate)
{
std::unordered_map<instruction_ref, std::string> names;
int count = 0;
for(auto ins : iterator_for(p))
{
std::string var_name = "@" + std::to_string(count);
if(ins->op.name() == "@param")
{
var_name = any_cast<builtin::param>(ins->op).parameter;
}
os << var_name << " = ";
os << ins->op;
if(ins->op.name() == "@literal")
{
if(ins->lit.get_shape().elements() > 10)
os << "{ ... }";
else
os << "{" << ins->lit << "}";
}
if(!ins->arguments.empty())
{
char delim = '(';
for(auto&& arg : ins->arguments)
{
assert(p.has_instruction(arg) && "Instruction not found");
os << delim << names.at(arg);
delim = ',';
}
os << ")";
}
os << " -> " << ins->result;
annonate(ins, names);
os << std::endl;
names.emplace(ins, var_name);
count++;
}
}
program::program() : impl(std::make_unique<program_impl>()) {} program::program() : impl(std::make_unique<program_impl>()) {}
program::program(program&&) noexcept = default; program::program(program&&) noexcept = default;
program& program::operator=(program&&) noexcept = default; program& program::operator=(program&&) noexcept = default;
program::~program() noexcept = default; program::~program() noexcept = default;
instruction_ref program::add_instruction(operation op, std::vector<instruction_ref> args) instruction_ref program::add_instruction(const operation& op, std::vector<instruction_ref> args)
{ {
return insert_instruction(impl->instructions.end(), std::move(op), std::move(args)); return insert_instruction(impl->instructions.end(), op, std::move(args));
} }
instruction_ref instruction_ref program::insert_instruction(instruction_ref ins,
program::insert_instruction(instruction_ref ins, operation op, std::vector<instruction_ref> args) const operation& op,
std::vector<instruction_ref> args)
{ {
assert(std::all_of( assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) && args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
...@@ -45,8 +98,9 @@ program::insert_instruction(instruction_ref ins, operation op, std::vector<instr ...@@ -45,8 +98,9 @@ program::insert_instruction(instruction_ref ins, operation op, std::vector<instr
return result; return result;
} }
instruction_ref instruction_ref program::replace_instruction(instruction_ref ins,
program::replace_instruction(instruction_ref ins, operation op, std::vector<instruction_ref> args) const operation& op,
std::vector<instruction_ref> args)
{ {
assert(std::all_of( assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) && args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
...@@ -117,7 +171,7 @@ instruction_ref program::add_literal(literal l) ...@@ -117,7 +171,7 @@ instruction_ref program::add_literal(literal l)
return impl->instructions.begin(); return impl->instructions.begin();
} }
instruction_ref program::add_outline(shape s) instruction_ref program::add_outline(const shape& s)
{ {
impl->instructions.push_front({builtin::outline{s}, s, {}}); impl->instructions.push_front({builtin::outline{s}, s, {}});
return impl->instructions.begin(); return impl->instructions.begin();
...@@ -125,7 +179,7 @@ instruction_ref program::add_outline(shape s) ...@@ -125,7 +179,7 @@ instruction_ref program::add_outline(shape s)
instruction_ref program::add_parameter(std::string name, shape s) instruction_ref program::add_parameter(std::string name, shape s)
{ {
impl->instructions.push_front({builtin::param{std::move(name)}, s, {}}); impl->instructions.push_front({builtin::param{std::move(name)}, std::move(s), {}});
return impl->instructions.begin(); return impl->instructions.begin();
} }
...@@ -170,6 +224,7 @@ bool program::has_instruction(instruction_ref ins) const ...@@ -170,6 +224,7 @@ bool program::has_instruction(instruction_ref ins) const
}) != impl->instructions.end(); }) != impl->instructions.end();
} }
std::size_t program::size() const { return impl->instructions.size(); }
instruction_ref program::begin() const { return impl->instructions.begin(); } instruction_ref program::begin() const { return impl->instructions.begin(); }
instruction_ref program::end() const { return impl->instructions.end(); } instruction_ref program::end() const { return impl->instructions.end(); }
...@@ -195,8 +250,10 @@ void program::compile(const target& t) ...@@ -195,8 +250,10 @@ void program::compile(const target& t)
std::cout << "Pass: " << p.name() << std::endl; std::cout << "Pass: " << p.name() << std::endl;
p.apply(*this); p.apply(*this);
if(enabled(MIGRAPH_TRACE_COMPILE{})) if(enabled(MIGRAPH_TRACE_COMPILE{}))
std::cout << *this << std::endl << std::endl; std::cout << *this << std::endl;
#ifndef NDEBUG #ifndef NDEBUG
if(enabled(MIGRAPH_TRACE_COMPILE{}))
std::cout << "Validate ..." << std::endl;
auto invalid = this->validate(); auto invalid = this->validate();
if(invalid != impl->instructions.end()) if(invalid != impl->instructions.end())
{ {
...@@ -204,6 +261,8 @@ void program::compile(const target& t) ...@@ -204,6 +261,8 @@ void program::compile(const target& t)
MIGRAPH_THROW(p.name() + " pass produces invalid program at instruction " + MIGRAPH_THROW(p.name() + " pass produces invalid program at instruction " +
std::to_string(index) + ": " + invalid->op.name()); std::to_string(index) + ": " + invalid->op.name());
} }
if(enabled(MIGRAPH_TRACE_COMPILE{}))
std::cout << std::endl;
#endif #endif
} }
auto invalid = this->validate(); auto invalid = this->validate();
...@@ -214,86 +273,132 @@ void program::compile(const target& t) ...@@ -214,86 +273,132 @@ void program::compile(const target& t)
} }
} }
argument program::eval(std::unordered_map<std::string, argument> params) const template <class F>
argument generic_eval(const program& p,
context& ctx,
std::unordered_map<std::string, argument> params,
F trace)
{ {
assert(this->validate() == impl->instructions.end()); assert(p.validate() == p.end());
std::unordered_map<const instruction*, argument> results; std::unordered_map<instruction_ref, argument> results;
argument result; results.reserve(p.size() * 2);
for(auto& ins : impl->instructions) std::vector<argument> values;
values.reserve(16);
for(auto ins : iterator_for(p))
{ {
if(ins.op.name() == "@literal") if(ins->op.name() == "@literal")
{ {
result = ins.lit.get_argument(); results.emplace(ins, trace(ins, [&] { return ins->lit.get_argument(); }));
} }
else if(ins.op.name() == "@param") else if(ins->op.name() == "@param")
{ {
result = params.at(any_cast<builtin::param>(ins.op).parameter); std::cout << ins->op.name() << std::endl;
results.emplace(ins, trace(ins, [&] {
return params.at(any_cast<builtin::param>(ins->op).parameter);
}));
} }
else if(ins.op.name() == "@outline") else if(ins->op.name() == "@outline")
{ {
result = argument{ins.result, nullptr}; results.emplace(ins, trace(ins, [&] { return argument{ins->result, nullptr}; }));
} }
else else
{ {
std::vector<argument> values(ins.arguments.size()); values.resize(ins->arguments.size());
std::transform(ins.arguments.begin(), std::transform(ins->arguments.begin(),
ins.arguments.end(), ins->arguments.end(),
values.begin(), values.begin(),
[&](instruction_ref i) { return results.at(std::addressof(*i)); }); [&](instruction_ref i) {
// std::cout << "Compute: " << ins.op.name() << std::endl; assert(results.find(i) != results.end());
result = ins.op.compute(this->impl->ctx, ins.result, values); return results[i];
});
results.emplace(ins,
trace(ins, [&] { return ins->op.compute(ctx, ins->result, values); }));
} }
results.emplace(std::addressof(ins), result); assert(results.find(ins) != results.end());
} }
return result; return results.at(std::prev(p.end()));
} }
bool operator==(const program& x, const program& y) { return to_string(x) == to_string(y); } argument program::eval(std::unordered_map<std::string, argument> params) const
{
return generic_eval(
*this, this->impl->ctx, std::move(params), [](auto&, auto f) { return f(); });
}
std::ostream& operator<<(std::ostream& os, const program& p) double common_average(const std::vector<double>& v)
{ {
std::unordered_map<const instruction*, std::string> names; std::size_t n = v.size() / 4;
int count = 0; double total = std::accumulate(v.begin() + n, v.end() - n, 0.0);
return total / std::distance(v.begin() + n, v.end() - n);
}
for(auto& ins : p.impl->instructions) void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
{ {
std::string var_name = "@" + std::to_string(count); using milliseconds = std::chrono::duration<double, std::milli>;
if(ins.op.name() == "@param") // Run once by itself
eval(params);
// Run and time entire program
std::vector<double> total_vec;
total_vec.reserve(n);
for(std::size_t i = 0; i < n; i++)
{ {
var_name = any_cast<builtin::param>(ins.op).parameter; total_vec.push_back(time<milliseconds>([&] { eval(params); }));
} }
std::sort(total_vec.begin(), total_vec.end());
os << var_name << " = "; std::unordered_map<instruction_ref, std::vector<double>> ins_vec;
// Fill the map
os << ins.op; generic_eval(*this, this->impl->ctx, params, [&](auto ins, auto) {
ins_vec[ins].reserve(n);
if(ins.op.name() == "@literal") return argument{};
});
// Run and time each instruction
for(std::size_t i = 0; i < n; i++)
{ {
if(ins.lit.get_shape().elements() > 10) generic_eval(*this, this->impl->ctx, params, [&](auto ins, auto f) {
os << "{ ... }"; argument result;
else ins_vec[ins].push_back(time<milliseconds>([&] { result = f(); }));
os << "{" << ins.lit << "}"; return result;
});
} }
for(auto&& p : ins_vec)
if(!ins.arguments.empty()) std::sort(p.second.begin(), p.second.end());
{ // Run and time implicit overhead
char delim = '('; std::vector<double> overhead_vec;
for(auto&& arg : ins.arguments) overhead_vec.reserve(n);
for(std::size_t i = 0; i < n; i++)
{ {
assert(p.has_instruction(arg) && "Instruction not found"); overhead_vec.push_back(time<milliseconds>([&] {
os << delim << names.at(std::addressof(*arg)); generic_eval(*this, this->impl->ctx, params, [](auto...) { return argument{}; });
delim = ','; }));
}
os << ")";
} }
os << " -> " << ins.result; double total_time = common_average(total_vec);
double rate = std::ceil(1000.0 / total_time);
double overhead_time = common_average(overhead_vec);
double overhead_percent = overhead_time * 100.0 / total_time;
double total_instruction_time = 0.0;
for(auto&& p : ins_vec)
total_instruction_time += common_average(p.second);
double calculate_overhead_time = total_time - total_instruction_time;
double calculate_overhead_percent = calculate_overhead_time * 100.0 / total_time;
print_program(
os, *this, [&](auto ins, auto&&) { os << ": " << common_average(ins_vec[ins]) << "ms"; });
os << "Rate: " << rate << "/sec" << std::endl;
os << "Total time: " << total_time << "ms" << std::endl;
os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
os << "Overhead time: " << overhead_time << "ms"
<< ", " << calculate_overhead_time << "ms" << std::endl;
os << "Overhead: " << std::round(overhead_percent) << "%"
<< ", " << std::round(calculate_overhead_percent) << "%" << std::endl;
}
os << std::endl; bool operator==(const program& x, const program& y) { return to_string(x) == to_string(y); }
names.emplace(std::addressof(ins), var_name); std::ostream& operator<<(std::ostream& os, const program& p)
count++; {
} print_program(os, p, [](auto&&...) {});
return os; return os;
} }
......
...@@ -8,46 +8,90 @@ ...@@ -8,46 +8,90 @@
namespace migraph { namespace migraph {
shape::shape() : m_type(float_type), m_standard(false) {} struct shape_impl
{
static std::shared_ptr<shape_impl> default_shape()
{
static std::shared_ptr<shape_impl> result = std::make_shared<shape_impl>();
return result;
}
shape::shape(type_t t) : m_type(t), m_lens({1}), m_strides({1}), m_standard(true) {} shape_impl() : m_type(shape::float_type), m_standard(false) {}
shape::shape(type_t t, std::vector<std::size_t> l)
shape_impl(shape::type_t t) : m_type(t), m_lens({1}), m_strides({1}), m_standard(true) {}
shape_impl(shape::type_t t, std::vector<std::size_t> l)
: m_type(t), m_lens(std::move(l)), m_standard(true) : m_type(t), m_lens(std::move(l)), m_standard(true)
{ {
this->calculate_strides(); this->calculate_strides();
assert(m_lens.size() == m_strides.size()); assert(m_lens.size() == m_strides.size());
} }
shape::shape(type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s) shape_impl(shape::type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s)
: m_type(t), m_lens(std::move(l)), m_strides(std::move(s)) : m_type(t), m_lens(std::move(l)), m_strides(std::move(s))
{ {
assert(m_lens.size() == m_strides.size()); assert(m_lens.size() == m_strides.size());
assert(std::any_of(m_strides.begin(), m_strides.end(), [](auto x) { return x > 0; }) and assert(std::any_of(m_strides.begin(), m_strides.end(), [](auto x) { return x > 0; }) and
"At least one stride must be non-zero"); "At least one stride must be non-zero");
m_standard = this->packed() and not this->transposed(); m_standard = this->elements() == this->element_space() and
} std::is_sorted(m_strides.rbegin(), m_strides.rend());
}
shape::type_t m_type;
std::vector<std::size_t> m_lens;
std::vector<std::size_t> m_strides;
bool m_standard;
void shape::calculate_strides() void calculate_strides()
{ {
m_strides.clear(); m_strides.clear();
m_strides.resize(m_lens.size(), 0); m_strides.resize(m_lens.size(), 0);
if(m_strides.empty()) if(m_strides.empty())
return; return;
m_strides.back() = 1; m_strides.back() = 1;
std::partial_sum( std::partial_sum(m_lens.rbegin(),
m_lens.rbegin(), m_lens.rend() - 1, m_strides.rbegin() + 1, std::multiplies<std::size_t>()); m_lens.rend() - 1,
} m_strides.rbegin() + 1,
std::multiplies<std::size_t>());
}
shape::type_t shape::type() const { return this->m_type; } std::size_t element_space() const
const std::vector<std::size_t>& shape::lens() const { return this->m_lens; } {
const std::vector<std::size_t>& shape::strides() const { return this->m_strides; } assert(m_lens.size() == m_strides.size());
std::size_t shape::elements() const if(m_lens.empty())
{ return 0;
assert(this->lens().size() == this->strides().size()); return std::inner_product(m_lens.begin(),
if(this->lens().empty()) m_lens.end(),
m_strides.begin(),
std::size_t{0},
std::plus<std::size_t>{},
[](std::size_t l, std::size_t s) { return (l - 1) * s; }) +
1;
}
std::size_t elements() const
{
assert(m_lens.size() == m_strides.size());
if(m_lens.empty())
return 0; return 0;
return std::accumulate( return std::accumulate(
this->lens().begin(), this->lens().end(), std::size_t{1}, std::multiplies<std::size_t>()); m_lens.begin(), m_lens.end(), std::size_t{1}, std::multiplies<std::size_t>());
}
};
shape::shape() : impl(shape_impl::default_shape()) {}
shape::shape(type_t t) : impl(std::make_shared<shape_impl>(t)) {}
shape::shape(type_t t, std::vector<std::size_t> l)
: impl(std::make_shared<shape_impl>(t, std::move(l)))
{
}
shape::shape(type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s)
: impl(std::make_shared<shape_impl>(t, std::move(l), std::move(s)))
{
} }
shape::type_t shape::type() const { return impl->m_type; }
const std::vector<std::size_t>& shape::lens() const { return impl->m_lens; }
const std::vector<std::size_t>& shape::strides() const { return impl->m_strides; }
std::size_t shape::elements() const { return impl->elements(); }
std::size_t shape::bytes() const std::size_t shape::bytes() const
{ {
std::size_t n = 0; std::size_t n = 0;
...@@ -98,25 +142,13 @@ bool shape::broadcasted() const ...@@ -98,25 +142,13 @@ bool shape::broadcasted() const
std::multiplies<std::size_t>()) == 0; std::multiplies<std::size_t>()) == 0;
} }
bool shape::standard() const { return this->m_standard; } bool shape::standard() const { return impl->m_standard; }
std::size_t shape::element_space() const std::size_t shape::element_space() const { return impl->element_space(); }
{
assert(this->lens().size() == this->strides().size());
if(this->lens().empty())
return 0;
return std::inner_product(this->lens().begin(),
this->lens().end(),
this->strides().begin(),
std::size_t{0},
std::plus<std::size_t>{},
[](std::size_t l, std::size_t s) { return (l - 1) * s; }) +
1;
}
std::string shape::type_string() const std::string shape::type_string() const
{ {
switch(this->m_type) switch(this->type())
{ {
#define MIGRAPH_SHAPE_TYPE_STRING_CASE(x, t) \ #define MIGRAPH_SHAPE_TYPE_STRING_CASE(x, t) \
case x: return #x; case x: return #x;
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <migraph/iterator_for.hpp> #include <migraph/iterator_for.hpp>
#include <migraph/cpu/gemm.hpp> #include <migraph/cpu/gemm.hpp>
#include <unordered_map> #include <unordered_map>
#include <utility>
namespace migraph { namespace migraph {
namespace cpu { namespace cpu {
...@@ -39,9 +40,9 @@ struct cpu_batch_norm_inference ...@@ -39,9 +40,9 @@ struct cpu_batch_norm_inference
std::string name() const { return "cpu::batch_norm_inference"; } std::string name() const { return "cpu::batch_norm_inference"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument output{output_shape}; argument output{output_shape};
...@@ -95,7 +96,7 @@ struct cpu_convolution ...@@ -95,7 +96,7 @@ struct cpu_convolution
convolution op; convolution op;
std::string name() const { return "cpu::convolution"; } std::string name() const { return "cpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, shape output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
...@@ -161,8 +162,8 @@ struct cpu_pooling ...@@ -161,8 +162,8 @@ struct cpu_pooling
pooling op; pooling op;
std::string name() const { return "cpu::pooling_" + Op::name(); } std::string name() const { return "cpu::pooling_" + Op::name(); }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
...@@ -208,8 +209,8 @@ struct cpu_contiguous ...@@ -208,8 +209,8 @@ struct cpu_contiguous
{ {
contiguous op; contiguous op;
std::string name() const { return "cpu::contiguous"; } std::string name() const { return "cpu::contiguous"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
...@@ -225,9 +226,9 @@ struct cpu_gemm ...@@ -225,9 +226,9 @@ struct cpu_gemm
{ {
gemm op; gemm op;
std::string name() const { return "cpu::gemm"; } std::string name() const { return "cpu::gemm"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); } shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
migemm(result, args[0], args[1], op.alpha, op.beta); migemm(result, args[0], args[1], op.alpha, op.beta);
...@@ -357,8 +358,8 @@ struct cpu_unary ...@@ -357,8 +358,8 @@ struct cpu_unary
{ {
Op op; Op op;
std::string name() const { return op.name(); } std::string name() const { return op.name(); }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); } shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
result.visit([&](auto output) { result.visit([&](auto output) {
...@@ -373,8 +374,8 @@ struct cpu_unary ...@@ -373,8 +374,8 @@ struct cpu_unary
struct softmax2d struct softmax2d
{ {
std::string name() const { return "cpu::softmax2d"; } std::string name() const { return "cpu::softmax2d"; }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); } shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
...@@ -449,8 +450,8 @@ struct cpu_binary ...@@ -449,8 +450,8 @@ struct cpu_binary
{ {
Op op; Op op;
std::string name() const { return op.name(); } std::string name() const { return op.name(); }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); } shape compute_shape(const std::vector<shape>& inputs) const { return inputs.front(); }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) { visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
......
...@@ -18,6 +18,7 @@ target_link_libraries(migraph_device migraph hip::device) ...@@ -18,6 +18,7 @@ target_link_libraries(migraph_device migraph hip::device)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
add_library(migraph_gpu add_library(migraph_gpu
eliminate_workspace.cpp
hip.cpp hip.cpp
target.cpp target.cpp
lowering.cpp lowering.cpp
......
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/program.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/ranges.hpp>
#include <migraph/stringutils.hpp>
namespace migraph {
namespace gpu {
void eliminate_workspace::apply(program& p) const
{
std::size_t n = 0;
std::vector<instruction_ref> allocs;
for(auto ins : iterator_for(p))
{
if(ins->output.size() != 1)
continue;
if(ins->op.name() != "hip::allocate")
continue;
auto&& a = any_cast<hip_allocate>(ins->op);
if(a.tag == "workspace")
{
n = std::max(n, ins->get_shape().bytes());
allocs.push_back(ins);
}
}
auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
for(auto&& a : allocs)
{
p.replace_instruction(a, ws);
p.remove_instruction(a);
}
}
} // namespace gpu
} // namespace migraph
...@@ -13,12 +13,28 @@ using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree); ...@@ -13,12 +13,28 @@ using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); } std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }
hip_ptr allocate_gpu(std::size_t sz) std::size_t get_available_gpu_memory()
{ {
size_t free, total;
auto status = hipMemGetInfo(&free, &total);
if(status != hipSuccess)
MIGRAPH_THROW("Failed getting available memory: " + hip_error(status));
return free;
}
hip_ptr allocate_gpu(std::size_t sz, bool host = false)
{
if(sz > get_available_gpu_memory())
MIGRAPH_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
void* result; void* result;
auto status = hipMalloc(&result, sz); auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
if(status != hipSuccess) if(status != hipSuccess)
{
if(host)
MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status)); MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status));
else
allocate_gpu(sz, true);
}
return hip_ptr{result}; return hip_ptr{result};
} }
...@@ -40,24 +56,24 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz) ...@@ -40,24 +56,24 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return result; return result;
} }
hip_ptr write_to_gpu(const void* x, std::size_t sz) hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
{ {
auto result = allocate_gpu(sz); auto result = allocate_gpu(sz, host);
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice); auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status)); MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
return result; return result;
} }
argument allocate_gpu(shape s) argument allocate_gpu(const shape& s, bool host)
{ {
auto p = share(allocate_gpu(s.bytes() + 1)); auto p = share(allocate_gpu(s.bytes() + 1, host));
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }}; return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
} }
argument to_gpu(argument arg) argument to_gpu(argument arg, bool host)
{ {
auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes())); auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes(), host));
return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }}; return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
} }
......
#ifndef MIGRAPH_GUARD_RTGLIB_ELIMINATE_WORKSPACE_HPP
#define MIGRAPH_GUARD_RTGLIB_ELIMINATE_WORKSPACE_HPP
#include <string>
#include <migraph/instruction_ref.hpp>
namespace migraph {
struct program;
namespace gpu {
struct eliminate_workspace
{
std::string name() const { return "eliminate_workspace"; }
void apply(program& p) const;
};
} // namespace gpu
} // namespace migraph
#endif
...@@ -2,25 +2,27 @@ ...@@ -2,25 +2,27 @@
#define MIGRAPH_GUARD_MIGRAPHLIB_HIP_HPP #define MIGRAPH_GUARD_MIGRAPHLIB_HIP_HPP
#include <migraph/operators.hpp> #include <migraph/operators.hpp>
#include <utility>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
migraph::argument allocate_gpu(migraph::shape s); migraph::argument allocate_gpu(const migraph::shape& s, bool host = false);
migraph::argument to_gpu(migraph::argument arg); migraph::argument to_gpu(migraph::argument arg, bool host = false);
migraph::argument from_gpu(migraph::argument arg); migraph::argument from_gpu(migraph::argument arg);
struct hip_allocate struct hip_allocate
{ {
std::string tag{};
std::string name() const { return "hip::allocate"; } std::string name() const { return "hip::allocate"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs}.has(1); check_shapes{inputs}.has(1);
return inputs.front(); return inputs.front();
} }
argument compute(context&, shape output_shape, std::vector<argument>) const argument compute(context&, const shape& output_shape, const std::vector<argument>&) const
{ {
return allocate_gpu(output_shape); return allocate_gpu(output_shape);
} }
...@@ -29,12 +31,12 @@ struct hip_allocate ...@@ -29,12 +31,12 @@ struct hip_allocate
struct hip_write struct hip_write
{ {
std::string name() const { return "hip::write"; } std::string name() const { return "hip::write"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs}.has(1); check_shapes{inputs}.has(1);
return inputs.front(); return inputs.front();
} }
argument compute(context&, shape, std::vector<argument> args) const argument compute(context&, const shape&, const std::vector<argument>& args) const
{ {
return to_gpu(args.front()); return to_gpu(args.front());
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <migraph/iterator_for.hpp> #include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp> #include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp> #include <migraph/gpu/context.hpp>
#include <utility>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
...@@ -22,14 +23,15 @@ struct miopen_batch_norm_inference ...@@ -22,14 +23,15 @@ struct miopen_batch_norm_inference
std::string name() const { return "gpu::batch_norm_inference"; } std::string name() const { return "gpu::batch_norm_inference"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(6); check_shapes{inputs, *this}.has(6);
return op.compute_shape( return op.compute_shape(
{inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)}); {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
...@@ -63,12 +65,13 @@ struct miopen_convolution ...@@ -63,12 +65,13 @@ struct miopen_convolution
miopenConvFwdAlgorithm_t algo{}; miopenConvFwdAlgorithm_t algo{};
std::string name() const { return "gpu::convolution"; } std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(4).standard(); check_shapes{inputs, *this}.has(4).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)}); return op.compute_shape({inputs.at(0), inputs.at(1)});
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto w_desc = make_tensor(args[1].get_shape()); auto w_desc = make_tensor(args[1].get_shape());
...@@ -91,7 +94,7 @@ struct miopen_convolution ...@@ -91,7 +94,7 @@ struct miopen_convolution
return args[3]; return args[3];
} }
shape compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs) shape compile(context& ctx, const shape& output_shape, std::vector<instruction_ref> inputs)
{ {
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]->get_shape()); auto x_desc = make_tensor(inputs[0]->get_shape());
...@@ -100,7 +103,7 @@ struct miopen_convolution ...@@ -100,7 +103,7 @@ struct miopen_convolution
std::size_t workspace_size = 0; std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize( miopenConvolutionForwardGetWorkSpaceSize(
ctx.handle.get(), x_desc.get(), w_desc.get(), cd.get(), y_desc.get(), &workspace_size); ctx.handle.get(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}}; workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape())); auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
...@@ -108,7 +111,7 @@ struct miopen_convolution ...@@ -108,7 +111,7 @@ struct miopen_convolution
auto y = to_gpu(generate_argument(output_shape)); auto y = to_gpu(generate_argument(output_shape));
auto workspace = allocate_gpu(workspace_shape); auto workspace = allocate_gpu(workspace_shape);
int algo_count; int algo_count = 1;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(ctx.handle.get(), miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
x_desc.get(), x_desc.get(),
...@@ -125,7 +128,8 @@ struct miopen_convolution ...@@ -125,7 +128,8 @@ struct miopen_convolution
workspace_size, workspace_size,
false); false);
algo = perf.fwd_algo; algo = perf.fwd_algo;
return workspace_shape; return algo == miopenConvolutionFwdAlgoWinograd ? shape{shape::int8_type, {0}}
: workspace_shape;
} }
}; };
...@@ -135,12 +139,13 @@ struct miopen_pooling ...@@ -135,12 +139,13 @@ struct miopen_pooling
shared<pooling_descriptor> pd; shared<pooling_descriptor> pd;
std::string name() const { return "gpu::pooling"; } std::string name() const { return "gpu::pooling"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2).standard(); check_shapes{inputs, *this}.has(2).standard();
return op.compute_shape({inputs.at(0)}); return op.compute_shape({inputs.at(0)});
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
...@@ -166,13 +171,14 @@ struct miopen_pooling ...@@ -166,13 +171,14 @@ struct miopen_pooling
struct miopen_add struct miopen_add
{ {
std::string name() const { return "gpu::add"; } std::string name() const { return "gpu::add"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(3).not_broadcasted(); check_shapes{inputs, *this}.has(3).not_broadcasted();
return inputs.at(0); return inputs.at(0);
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
if(args[1].get_shape().broadcasted()) if(args[1].get_shape().broadcasted())
{ {
...@@ -213,12 +219,13 @@ struct miopen_gemm ...@@ -213,12 +219,13 @@ struct miopen_gemm
{ {
gemm op; gemm op;
std::string name() const { return "gpu::convolution"; } std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(3); check_shapes{inputs, *this}.has(3);
return op.compute_shape({inputs.at(0), inputs.at(1)}); return op.compute_shape({inputs.at(0), inputs.at(1)});
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
float alpha = 1.0f; float alpha = 1.0f;
float beta = 0.0f; float beta = 0.0f;
...@@ -252,14 +259,14 @@ struct miopen_contiguous ...@@ -252,14 +259,14 @@ struct miopen_contiguous
{ {
contiguous op; contiguous op;
std::string name() const { return "gpu::contiguous"; } std::string name() const { return "gpu::contiguous"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2); check_shapes{inputs, *this}.has(2);
return op.compute_shape({inputs.at(0)}); return op.compute_shape({inputs.at(0)});
} }
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, shape output_shape, const std::vector<argument>& args) const
{ {
hip_contiguous(output_shape, args.at(0), args.at(1)); hip_contiguous(std::move(output_shape), args.at(0), args.at(1));
return args.at(1); return args.at(1);
} }
}; };
...@@ -268,13 +275,14 @@ struct miopen_relu ...@@ -268,13 +275,14 @@ struct miopen_relu
{ {
shared<activation_descriptor> ad; shared<activation_descriptor> ad;
std::string name() const { return "gpu::relu"; } std::string name() const { return "gpu::relu"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2).not_broadcasted(); check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1); return inputs.at(1);
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
...@@ -297,42 +305,50 @@ struct miopen_apply ...@@ -297,42 +305,50 @@ struct miopen_apply
program* prog = nullptr; program* prog = nullptr;
context ctx{}; context ctx{};
void check_shape(shape x, instruction_ref i)
{
assert(x == i->get_shape());
(void)x;
(void)i;
}
void apply() void apply()
{ {
for(auto it = prog->begin(); it != prog->end(); it++) for(auto it = prog->begin(); it != prog->end(); it++)
{ {
auto s = it->get_shape();
if(it->op.name() == "convolution") if(it->op.name() == "convolution")
{ {
apply_convolution(it); check_shape(s, apply_convolution(it));
} }
else if(it->op.name() == "activation") else if(it->op.name() == "activation")
{ {
apply_activation(it); check_shape(s, apply_activation(it));
} }
else if(it->op.name() == "pooling") else if(it->op.name() == "pooling")
{ {
apply_pooling(it); check_shape(s, apply_pooling(it));
} }
else if(it->op.name() == "add") else if(it->op.name() == "add")
{ {
apply_add(it); check_shape(s, apply_add(it));
} }
else if(it->op.name() == "gemm") else if(it->op.name() == "gemm")
{ {
apply_gemm(it); check_shape(s, apply_gemm(it));
} }
else if(it->op.name() == "contiguous") else if(it->op.name() == "contiguous")
{ {
apply_contiguous(it); check_shape(s, apply_contiguous(it));
} }
else if(it->op.name() == "batch_norm_inference") else if(it->op.name() == "batch_norm_inference")
{ {
apply_batch_norm_inference(it); check_shape(s, apply_batch_norm_inference(it));
} }
} }
} }
instruction_ref insert_allocation(instruction_ref ins, const shape& s) instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
{ {
if(ins == --prog->end()) if(ins == --prog->end())
{ {
...@@ -341,70 +357,71 @@ struct miopen_apply ...@@ -341,70 +357,71 @@ struct miopen_apply
else else
{ {
auto is = prog->add_outline(s); auto is = prog->add_outline(s);
auto result = prog->insert_instruction(ins, hip_allocate{}, is); auto result = prog->insert_instruction(ins, hip_allocate{std::move(tag)}, is);
return result; return result;
} }
} }
void apply_convolution(instruction_ref ins) instruction_ref apply_convolution(instruction_ref ins)
{ {
auto&& op = any_cast<convolution>(ins->op); auto&& op = any_cast<convolution>(ins->op);
auto conv = miopen_convolution{op, make_conv(op)}; auto conv = miopen_convolution{op, make_conv(op)};
auto ws = conv.compile(ctx, ins->result, ins->arguments); auto ws = conv.compile(ctx, ins->result, ins->arguments);
auto workspace = insert_allocation(ins, ws); auto workspace = insert_allocation(ins, ws, "workspace");
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction( return prog->replace_instruction(
ins, conv, ins->arguments.at(0), ins->arguments.at(1), workspace, output); ins, conv, ins->arguments.at(0), ins->arguments.at(1), workspace, output);
} }
void apply_pooling(instruction_ref ins) instruction_ref apply_pooling(instruction_ref ins)
{ {
auto&& op = any_cast<pooling>(ins->op); auto&& op = any_cast<pooling>(ins->op);
auto pd = make_pooling(op); auto pd = make_pooling(op);
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction( return prog->replace_instruction(
ins, miopen_pooling{op, std::move(pd)}, ins->arguments.at(0), output); ins, miopen_pooling{op, std::move(pd)}, ins->arguments.at(0), output);
} }
void apply_activation(instruction_ref ins) instruction_ref apply_activation(instruction_ref ins)
{ {
auto&& op = any_cast<activation>(ins->op); auto&& op = any_cast<activation>(ins->op);
auto ad = make_relu(); auto ad = make_relu();
if(op.mode == "relu") if(op.mode == "relu")
{ {
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction( return prog->replace_instruction(
ins, miopen_relu{std::move(ad)}, ins->arguments.at(0), output); ins, miopen_relu{std::move(ad)}, ins->arguments.at(0), output);
} }
return ins;
} }
void apply_add(instruction_ref ins) instruction_ref apply_add(instruction_ref ins)
{ {
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction( return prog->replace_instruction(
ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output); ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output);
} }
void apply_gemm(instruction_ref ins) instruction_ref apply_gemm(instruction_ref ins)
{ {
auto&& op = any_cast<gemm>(ins->op); auto&& op = any_cast<gemm>(ins->op);
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction( return prog->replace_instruction(
ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output); ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output);
} }
void apply_contiguous(instruction_ref ins) instruction_ref apply_contiguous(instruction_ref ins)
{ {
auto&& op = any_cast<contiguous>(ins->op); auto&& op = any_cast<contiguous>(ins->op);
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output); return prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
} }
void apply_batch_norm_inference(instruction_ref ins) instruction_ref apply_batch_norm_inference(instruction_ref ins)
{ {
auto&& op = any_cast<batch_norm_inference>(ins->op); auto&& op = any_cast<batch_norm_inference>(ins->op);
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
...@@ -416,7 +433,7 @@ struct miopen_apply ...@@ -416,7 +433,7 @@ struct miopen_apply
ins->arguments.end(), ins->arguments.end(),
std::back_inserter(reshapes), std::back_inserter(reshapes),
[&](auto i) { return prog->insert_instruction(ins, reshape_op, i); }); [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
prog->replace_instruction(ins, return prog->replace_instruction(ins,
miopen_batch_norm_inference{op}, miopen_batch_norm_inference{op},
ins->arguments.at(0), ins->arguments.at(0),
reshapes[0], reshapes[0],
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <migraph/gpu/lowering.hpp> #include <migraph/gpu/lowering.hpp>
#include <migraph/gpu/write_literals.hpp> #include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp> #include <migraph/gpu/context.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/check_context.hpp> #include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp> #include <migraph/auto_contiguous.hpp>
#include <migraph/dead_code_elimination.hpp> #include <migraph/dead_code_elimination.hpp>
...@@ -22,9 +23,9 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const ...@@ -22,9 +23,9 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
//simplify_reshapes{}, //simplify_reshapes{},
//dead_code_elimination{}, //dead_code_elimination{},
lowering{ctx}, lowering{ctx},
//eliminate_contiguous{}, eliminate_workspace{},
//dead_code_elimination{}, eliminate_contiguous{},
//write_literals{}, dead_code_elimination{},
write_literals{}, write_literals{},
//check_context<context>{}, //check_context<context>{},
dead_code_elimination{} dead_code_elimination{}
......
...@@ -605,9 +605,7 @@ void transpose_test() ...@@ -605,9 +605,7 @@ void transpose_test()
result.visit([&](auto output) { result.visit([&](auto output) {
std::vector<size_t> new_lens = {1, 3, 2, 2}; std::vector<size_t> new_lens = {1, 3, 2, 2};
std::vector<size_t> new_strides = {12, 1, 6, 3};
EXPECT(bool{output.get_shape().lens() == new_lens}); EXPECT(bool{output.get_shape().lens() == new_lens});
EXPECT(bool{output.get_shape().strides() == new_strides});
}); });
} }
{ {
......
...@@ -104,6 +104,7 @@ void verify_program() ...@@ -104,6 +104,7 @@ void verify_program()
visit_all(cpu_arg_f.get(), gpu_arg)([](auto cpu, auto gpu) { visit_all(cpu_arg_f.get(), gpu_arg)([](auto cpu, auto gpu) {
if(not migraph::verify_range(cpu, gpu)) if(not migraph::verify_range(cpu, gpu))
{ {
// TODO: Check for nans
std::cout << "FAILED: " << migraph::get_type_name<V>() << std::endl; std::cout << "FAILED: " << migraph::get_type_name<V>() << std::endl;
} }
}); });
...@@ -272,6 +273,29 @@ struct test_transpose ...@@ -272,6 +273,29 @@ struct test_transpose
} }
}; };
struct test_batchnorm_inference_2
{
const size_t width = 14;
const size_t height = 14;
const size_t channels = 256;
const size_t batches = 1;
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {batches, channels, height, width}};
migraph::shape vars{migraph::shape::float_type, {channels}};
auto x = p.add_parameter("x", s);
auto mean = p.add_parameter("mean", vars);
auto variance = p.add_parameter("variance", vars);
auto scale = p.add_parameter("scale", vars);
auto bias = p.add_parameter("bias", vars);
p.add_instruction(migraph::batch_norm_inference{}, x, mean, variance, scale, bias);
return p;
}
};
struct test_batchnorm_inference struct test_batchnorm_inference
{ {
const size_t width = 3; const size_t width = 3;
...@@ -309,4 +333,5 @@ int main() ...@@ -309,4 +333,5 @@ int main()
verify_program<test_contiguous>(); verify_program<test_contiguous>();
verify_program<test_transpose>(); verify_program<test_transpose>();
verify_program<test_batchnorm_inference>(); verify_program<test_batchnorm_inference>();
verify_program<test_batchnorm_inference_2>();
} }
...@@ -6,7 +6,7 @@ struct sum_op ...@@ -6,7 +6,7 @@ struct sum_op
{ {
std::string name() const { return "sum"; } std::string name() const { return "sum"; }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument> args) const compute(migraph::context&, const migraph::shape&, std::vector<migraph::argument> args) const
{ {
migraph::argument result; migraph::argument result;
if(args.size() != 2) if(args.size() != 2)
...@@ -36,7 +36,7 @@ struct minus_op ...@@ -36,7 +36,7 @@ struct minus_op
{ {
std::string name() const { return "minus"; } std::string name() const { return "minus"; }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument> args) const compute(migraph::context&, const migraph::shape&, std::vector<migraph::argument> args) const
{ {
migraph::argument result; migraph::argument result;
if(args.size() != 2) if(args.size() != 2)
...@@ -66,7 +66,7 @@ struct pass_op ...@@ -66,7 +66,7 @@ struct pass_op
{ {
std::string name() const { return "pass"; } std::string name() const { return "pass"; }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument> args) const compute(migraph::context&, const migraph::shape&, std::vector<migraph::argument> args) const
{ {
if(args.empty()) if(args.empty())
return {}; return {};
...@@ -85,7 +85,7 @@ struct pass_standard_op ...@@ -85,7 +85,7 @@ struct pass_standard_op
{ {
std::string name() const { return "pass"; } std::string name() const { return "pass"; }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument> args) const compute(migraph::context&, const migraph::shape&, std::vector<migraph::argument> args) const
{ {
if(args.empty()) if(args.empty())
return {}; return {};
...@@ -109,12 +109,12 @@ struct nop ...@@ -109,12 +109,12 @@ struct nop
{ {
std::string name() const { return "nop"; } std::string name() const { return "nop"; }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument>) const compute(migraph::context&, const migraph::shape&, const std::vector<migraph::argument>&) const
{ {
return {}; return {};
} }
migraph::shape compute_shape(std::vector<migraph::shape>) const { return {}; } migraph::shape compute_shape(const std::vector<migraph::shape>&) const { return {}; }
}; };
inline migraph::literal get_2x2() inline migraph::literal get_2x2()
......
...@@ -141,7 +141,7 @@ bool throws(F f) ...@@ -141,7 +141,7 @@ bool throws(F f)
} }
template <class F, class Exception> template <class F, class Exception>
bool throws(F f, std::string msg = "") bool throws(F f, const std::string& msg = "")
{ {
try try
{ {
......
#include <migraph/program.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <sstream>
#include "test.hpp"
template <class... Ts>
void expect_shape(const migraph::shape& expected, const migraph::operation& op, Ts... xs)
{
migraph::program p;
std::vector<migraph::shape> shapes{xs...};
std::vector<migraph::instruction_ref> args(shapes.size());
std::transform(
shapes.begin(), shapes.end(), args.begin(), [&](auto&& s) { return p.add_outline(s); });
p.add_instruction(op, args);
if(p.get_shape() != expected)
{
std::cout << "FAILED: Incorrect shape for " << op.name() << ": ";
std::cout << expected << " != " << p.get_shape() << std::endl;
for(auto&& s : shapes)
std::cout << " " << s << std::endl;
}
}
template <class... Ts>
void throws_shape(const migraph::operation& op, Ts... xs)
{
migraph::program p;
std::vector<migraph::shape> shapes{xs...};
std::vector<migraph::instruction_ref> args(shapes.size());
std::transform(
shapes.begin(), shapes.end(), args.begin(), [&](auto&& s) { return p.add_outline(s); });
bool thrown = test::throws([&] { p.add_instruction(op, args); });
if(not thrown)
{
std::cout << "FAILED: No error found for " << op.name() << ": ";
for(auto&& s : shapes)
std::cout << " " << s << std::endl;
}
}
template <class...>
struct always_false : std::false_type
{
};
template <class... Ts>
void throws_shape(const migraph::shape&, Ts...)
{
static_assert(always_false<Ts...>{},
"An expected shape should not be passed to throws_shape function");
}
void batch_norm_inference_shape()
{
const size_t channels = 3;
migraph::shape s{migraph::shape::float_type, {4, channels, 3, 3}};
migraph::shape vars{migraph::shape::float_type, {channels}};
expect_shape(s, migraph::batch_norm_inference{}, s, vars, vars, vars, vars);
throws_shape(migraph::batch_norm_inference{}, s);
throws_shape(migraph::batch_norm_inference{}, s, vars, vars, vars, vars, vars);
}
void convolution_shape()
{
migraph::shape output{migraph::shape::float_type, {4, 4, 1, 1}};
migraph::shape input{migraph::shape::float_type, {4, 3, 3, 3}};
migraph::shape weights{migraph::shape::float_type, {4, 3, 3, 3}};
expect_shape(output, migraph::convolution{}, input, weights);
throws_shape(migraph::convolution{}, input);
migraph::shape input2{migraph::shape::float_type, {3, 3}};
migraph::shape weights2{migraph::shape::float_type, {3, 3}};
throws_shape(migraph::convolution{}, input2, weights2);
throws_shape(migraph::convolution{}, input2, weights);
}
void transpose_shape()
{
migraph::shape input{migraph::shape::float_type, {2, 2}};
migraph::shape output{migraph::shape::float_type, {2, 2}, {1, 2}};
expect_shape(input, migraph::transpose{{0, 1}}, input);
expect_shape(output, migraph::transpose{{1, 0}}, input);
throws_shape(migraph::transpose{{1, 2}}, input);
}
void contiguous_shape()
{
migraph::shape output{migraph::shape::float_type, {2, 2}};
migraph::shape input{migraph::shape::float_type, {2, 2}, {1, 2}};
expect_shape(output, migraph::contiguous{}, input);
throws_shape(migraph::contiguous{}, input, input);
migraph::shape single{migraph::shape::float_type, {2}};
throws_shape(migraph::contiguous{}, single);
}
void reshape_shape()
{
migraph::shape input{migraph::shape::float_type, {24, 1, 1, 1}};
for(auto&& new_shape :
std::vector<std::vector<int64_t>>{{8, 3, 1, 1}, {1, 3, 4, 2}, {1, 3, 4, 2}})
{
std::vector<std::size_t> lens(new_shape.size());
std::copy(new_shape.begin(), new_shape.end(), lens.begin());
migraph::shape output{migraph::shape::float_type, lens};
expect_shape(output, migraph::reshape{new_shape}, input);
}
for(auto&& new_shape : std::vector<std::vector<int64_t>>{{8, 3, 2, 2}, {1, 3, -1, -1}})
{
throws_shape(migraph::reshape{new_shape}, input);
}
}
void flatten_shape()
{
migraph::shape input{migraph::shape::float_type, {2, 4, 6, 8}};
expect_shape(
migraph::shape{migraph::shape::float_type, {1, 2 * 4 * 6 * 8}}, migraph::flatten{0}, input);
expect_shape(
migraph::shape{migraph::shape::float_type, {2, 4 * 6 * 8}}, migraph::flatten{1}, input);
expect_shape(
migraph::shape{migraph::shape::float_type, {2 * 4, 6 * 8}}, migraph::flatten{2}, input);
expect_shape(
migraph::shape{migraph::shape::float_type, {2 * 4 * 6, 8}}, migraph::flatten{3}, input);
expect_shape(
migraph::shape{migraph::shape::float_type, {2 * 4 * 6 * 8, 1}}, migraph::flatten{4}, input);
throws_shape(migraph::flatten{5}, input);
}
int main()
{
batch_norm_inference_shape();
convolution_shape();
transpose_shape();
contiguous_shape();
reshape_shape();
flatten_shape();
}
...@@ -8,12 +8,12 @@ struct simple_operation ...@@ -8,12 +8,12 @@ struct simple_operation
{ {
int data = 1; int data = 1;
std::string name() const { return "simple"; } std::string name() const { return "simple"; }
migraph::shape compute_shape(std::vector<migraph::shape>) const migraph::shape compute_shape(const std::vector<migraph::shape>&) const
{ {
MIGRAPH_THROW("not computable"); MIGRAPH_THROW("not computable");
} }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument>) const compute(migraph::context&, const migraph::shape&, const std::vector<migraph::argument>&) const
{ {
MIGRAPH_THROW("not computable"); MIGRAPH_THROW("not computable");
} }
...@@ -27,12 +27,12 @@ struct simple_operation ...@@ -27,12 +27,12 @@ struct simple_operation
struct simple_operation_no_print struct simple_operation_no_print
{ {
std::string name() const { return "simple"; } std::string name() const { return "simple"; }
migraph::shape compute_shape(std::vector<migraph::shape>) const migraph::shape compute_shape(const std::vector<migraph::shape>&) const
{ {
MIGRAPH_THROW("not computable"); MIGRAPH_THROW("not computable");
} }
migraph::argument migraph::argument
compute(migraph::context&, migraph::shape, std::vector<migraph::argument>) const compute(migraph::context&, const migraph::shape&, const std::vector<migraph::argument>&) const
{ {
MIGRAPH_THROW("not computable"); MIGRAPH_THROW("not computable");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment