".github/vscode:/vscode.git/clone" did not exist on "7f875f1293aa4dab646e312b1e67edda372102c7"
Commit fb75dfaf authored by Paul's avatar Paul
Browse files

Only use no-cache on jenkins

parents e596eec2 f0604d78
......@@ -5,7 +5,23 @@ add_library(onnx-proto STATIC ${PROTO_SRCS})
target_include_directories(onnx-proto SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${PROTOBUF_INCLUDE_DIR})
target_compile_options(onnx-proto PRIVATE -w)
target_link_libraries(onnx-proto PRIVATE ${PROTOBUF_LIBRARY})
set_target_properties(onnx-proto PROPERTIES POSITION_INDEPENDENT_CODE On)
add_library(migraph_onnx onnx.cpp)
rocm_clang_tidy_check(migraph_onnx)
target_link_libraries(migraph_onnx PRIVATE onnx-proto)
target_link_libraries(migraph_onnx PUBLIC migraph)
add_executable(read_onnx read_onnx.cpp)
rocm_clang_tidy_check(read_onnx)
target_link_libraries(read_onnx onnx-proto rtg)
target_link_libraries(read_onnx migraph_onnx)
add_executable(mnist mnist.cpp)
rocm_clang_tidy_check(mnist)
target_link_libraries(mnist migraph_cpu migraph_onnx)
if(MIGRAPH_ENABLE_GPU)
add_executable(verify_onnx verify_onnx.cpp)
rocm_clang_tidy_check(verify_onnx)
target_link_libraries(verify_onnx migraph_onnx migraph_cpu migraph_gpu)
endif()
#include <cstdio>
#include <string>
#include <fstream>
#include <numeric>
#include <stdexcept>
#include <migraph/onnx.hpp>
#include <migraph/cpu/cpu_target.hpp>
#include <migraph/generate.hpp>
auto reverse_int(unsigned int i)
{
unsigned char c1, c2, c3, c4;
c1 = i & 255u;
c2 = (i >> 8u) & 255u;
c3 = (i >> 16u) & 255u;
c4 = (i >> 24u) & 255u;
return (static_cast<unsigned int>(c1) << 24u) + (static_cast<unsigned int>(c2) << 16u) +
(static_cast<unsigned int>(c3) << 8u) + c4;
};
std::vector<float> read_mnist_images(std::string full_path, int& number_of_images, int& image_size)
{
using uchar = unsigned char;
std::ifstream file(full_path, std::ios::binary);
if(file.is_open())
{
int magic_number = 0, n_rows = 0, n_cols = 0;
file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
magic_number = reverse_int(magic_number);
if(magic_number != 2051)
throw std::runtime_error("Invalid MNIST image file!");
file.read(reinterpret_cast<char*>(&number_of_images), sizeof(number_of_images));
number_of_images = reverse_int(number_of_images);
file.read(reinterpret_cast<char*>(&n_rows), sizeof(n_rows));
n_rows = reverse_int(n_rows);
file.read(reinterpret_cast<char*>(&n_cols), sizeof(n_cols));
n_cols = reverse_int(n_cols);
image_size = n_rows * n_cols;
std::vector<float> result(number_of_images * image_size);
for(int i = 0; i < number_of_images; i++)
{
for(int j = 0; j < image_size; j++)
{
uchar tmp;
file.read(reinterpret_cast<char*>(&tmp), 1);
result[i * image_size + j] = tmp / 255.0;
}
}
return result;
}
else
{
throw std::runtime_error("Cannot open file `" + full_path + "`!");
}
}
std::vector<int32_t> read_mnist_labels(std::string full_path, int& number_of_labels)
{
using uchar = unsigned char;
std::ifstream file(full_path, std::ios::binary);
if(file.is_open())
{
int magic_number = 0;
file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
magic_number = reverse_int(magic_number);
if(magic_number != 2049)
throw std::runtime_error("Invalid MNIST label file!");
file.read(reinterpret_cast<char*>(&number_of_labels), sizeof(number_of_labels));
number_of_labels = reverse_int(number_of_labels);
std::vector<int32_t> result(number_of_labels);
for(int i = 0; i < number_of_labels; i++)
{
uchar tmp;
file.read(reinterpret_cast<char*>(&tmp), 1);
result[i] = tmp;
}
return result;
}
else
{
throw std::runtime_error("Unable to open file `" + full_path + "`!");
}
}
std::vector<float> softmax(std::vector<float> p)
{
size_t n = p.size();
std::vector<float> result(n);
std::transform(p.begin(), p.end(), result.begin(), [](auto x) { return std::exp(x); });
float s = std::accumulate(result.begin(), result.end(), 0.0f, std::plus<float>());
std::transform(result.begin(), result.end(), result.begin(), [=](auto x) { return x / s; });
return result;
}
int main(int argc, char const* argv[])
{
if(argc > 3)
{
std::string datafile = argv[2];
std::string labelfile = argv[3];
int nimages = -1;
int image_size = -1;
int nlabels = -1;
std::vector<float> input = read_mnist_images(datafile, nimages, image_size);
std::vector<int32_t> labels = read_mnist_labels(labelfile, nlabels);
std::string file = argv[1];
auto prog = migraph::parse_onnx(file);
prog.compile(migraph::cpu::cpu_target{});
auto s = migraph::shape{migraph::shape::float_type, {1, 1, 28, 28}};
std::cout << s << std::endl;
auto ptr = input.data();
for(int i = 0; i < 20; i++)
{
std::cout << "label: " << labels[i] << " ----> ";
auto input3 = migraph::argument{s, &ptr[784 * i]};
auto result = prog.eval({{"Input3", input3}});
std::vector<float> logits;
result.visit([&](auto output) { logits.assign(output.begin(), output.end()); });
std::vector<float> probs = softmax(logits);
for(auto x : probs)
std::cout << x << " ";
std::cout << std::endl;
}
std::cout << std::endl;
}
}
#include <google/protobuf/text_format.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <onnx.pb.h>
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <functional>
#include <array>
#include <vector>
#include <migraph/fallthrough.hpp>
#include <migraph/program.hpp>
#include <migraph/operators.hpp>
#include <migraph/ranges.hpp>
#include <migraph/instruction.hpp>
namespace migraph {
struct unknown
{
std::string op;
std::string name() const { return "unknown:" + op; }
shape compute_shape(std::vector<shape> input) const
{
if(input.empty())
return {};
else
return input.front();
}
argument compute(context&, shape, std::vector<argument>) const
{
MIGRAPH_THROW("not computable");
}
friend std::ostream& operator<<(std::ostream& os, const unknown& x)
{
os << x.name();
return os;
}
};
struct onnx_parser
{
using attribute_map = std::unordered_map<std::string, onnx::AttributeProto>;
using node_map = std::unordered_map<std::string, onnx::NodeProto>;
using op_func = std::function<instruction_ref(attribute_map, std::vector<instruction_ref>)>;
node_map nodes;
std::unordered_map<std::string, instruction_ref> instructions;
program prog = program();
std::unordered_map<std::string, op_func> ops;
onnx_parser()
{
add_generic_op("Add", add{});
add_generic_op("Div", div{});
add_generic_op("MatMul", gemm{});
add_generic_op("Mul", mul{});
add_generic_op("Relu", activation{"relu"});
add_generic_op("Sub", sub{});
add_mem_op("Constant", &onnx_parser::parse_constant);
add_mem_op("Conv", &onnx_parser::parse_conv);
add_mem_op("MaxPool", &onnx_parser::parse_pooling);
add_mem_op("Reshape", &onnx_parser::parse_reshape);
add_mem_op("BatchNormalization", &onnx_parser::parse_batchnorm);
}
template <class F>
void add_op(std::string name, F f)
{
ops.emplace(name, f);
}
template <class F>
void add_mem_op(std::string name, F f)
{
ops.emplace(name, [=](auto&&... xs) {
return std::mem_fn(f)(*this, name, std::forward<decltype(xs)>(xs)...);
});
}
template <class T>
void add_generic_op(std::string name, T x)
{
ops.emplace(name, [this, x](attribute_map attributes, std::vector<instruction_ref> args) {
if(args.size() == 2 and contains(attributes, "broadcast"))
{
uint64_t broadcasted = parse_value(attributes.at("broadcast")).at<uint64_t>();
if(broadcasted != 0)
{
uint64_t axis = (contains(attributes, "axis"))
? parse_value(attributes.at("axis")).at<uint64_t>()
: 0;
auto l = prog.add_instruction(broadcast{axis}, args);
return prog.add_instruction(x, args[0], l);
}
}
return prog.add_instruction(x, args);
});
}
instruction_ref
parse_conv(std::string, attribute_map attributes, std::vector<instruction_ref> args)
{
convolution op;
if(contains(attributes, "pads"))
{
copy(attributes["pads"].ints(), op.padding.begin());
}
if(contains(attributes, "strides"))
{
copy(attributes["strides"].ints(), op.stride.begin());
}
if(contains(attributes, "dilations"))
{
copy(attributes["dilations"].ints(), op.dilation.begin());
}
if(args.size() == 3)
{
uint64_t axis = 1;
auto l1 = prog.add_instruction(op, args[0], args[1]);
auto l2 = prog.add_instruction(broadcast{axis}, l1, args[2]);
return prog.add_instruction(add{}, l1, l2);
}
return prog.add_instruction(op, args);
}
instruction_ref
parse_pooling(std::string, attribute_map attributes, std::vector<instruction_ref> args)
{
pooling op{"max"};
if(contains(attributes, "pads"))
{
copy(attributes["pads"].ints(), op.padding.begin());
}
if(contains(attributes, "strides"))
{
copy(attributes["strides"].ints(), op.stride.begin());
}
if(contains(attributes, "kernel_shape"))
{
copy(attributes["kernel_shape"].ints(), op.lengths.begin());
}
return prog.add_instruction(op, args);
}
instruction_ref
parse_reshape(std::string, attribute_map attributes, std::vector<instruction_ref> args)
{
reshape op;
if(args.size() == 1)
{
literal s = parse_value(attributes.at("shape"));
s.visit([&](auto v) { copy(v, std::back_inserter(op.dims)); });
}
if(args.size() == 2)
{
literal s = args[1]->lit;
s.visit([&](auto v) { copy(v, std::back_inserter(op.dims)); });
}
return prog.add_instruction(op, args[0]);
}
instruction_ref
parse_constant(std::string, attribute_map attributes, std::vector<instruction_ref>)
{
literal v = parse_value(attributes.at("value"));
return prog.add_literal(v);
}
instruction_ref
parse_batchnorm(std::string, attribute_map attributes, std::vector<instruction_ref> args)
{
float epsilon = 1e-5f;
float momentum = 0.9f;
batch_norm_inference::bn_infer_mode_t bn_mode = batch_norm_inference::spatial;
bool is_test = false;
if(contains(attributes, "epsilon"))
{
epsilon = parse_value(attributes.at("epsilon")).at<float>();
}
if(contains(attributes, "momentum"))
{
epsilon = parse_value(attributes.at("momentum")).at<float>();
}
if(contains(attributes, "is_test"))
{
is_test = parse_value(attributes.at("is_test")).at<uint64_t>() > 0;
}
if(contains(attributes, "spatial"))
{
bn_mode = (parse_value(attributes.at("spatial")).at<uint64_t>() > 0)
? batch_norm_inference::spatial
: batch_norm_inference::per_activation;
}
batch_norm_inference op{epsilon, momentum, bn_mode, is_test};
return prog.add_instruction(op, args);
}
void parse_from(std::istream& is)
{
onnx::ModelProto model;
if(model.ParseFromIstream(&is))
{
if(model.has_graph())
{
this->parse_graph(model.graph());
}
}
else
{
throw std::runtime_error("Failed reading");
}
}
void parse_graph(const onnx::GraphProto& graph)
{
nodes = get_nodes(graph);
for(auto&& input : graph.input())
{
const std::string& name = input.name();
// TODO: Get shape of input parameter
shape s = parse_type(input.type());
instructions[name] = prog.add_parameter(name, s);
}
for(auto&& p : nodes)
{
this->parse_node(get_name(p.second));
}
}
void parse_node(std::string name)
{
if(name.empty())
MIGRAPH_THROW("Onnx node must have a name");
if(instructions.count(name) == 0)
{
auto&& node = nodes.at(name);
std::vector<instruction_ref> args;
for(auto&& input : node.input())
{
if(nodes.count(input) > 0)
{
auto&& iname = get_name(nodes.at(input));
assert(name != iname);
this->parse_node(iname);
args.push_back(instructions.at(iname));
}
else
{
args.push_back(instructions.at(input));
}
}
if(ops.count(node.op_type()) == 0)
{
instructions[name] = prog.add_instruction(unknown{node.op_type()}, args);
}
else
{
instructions[name] = ops[node.op_type()](get_attributes(node), args);
}
}
}
static attribute_map get_attributes(const onnx::NodeProto& node)
{
std::unordered_map<std::string, onnx::AttributeProto> result;
for(auto&& attr : node.attribute())
{
result[attr.name()] = attr;
}
return result;
}
static std::string get_name(const onnx::NodeProto& node)
{
if(node.name().empty())
{
std::string generated = "migraph_unnamed_node";
for(auto&& output : node.output())
{
generated += "_" + output;
}
return generated;
}
return node.name();
}
static node_map get_nodes(const onnx::GraphProto& graph)
{
std::unordered_map<std::string, onnx::NodeProto> result;
for(auto&& node : graph.node())
{
result[get_name(node)] = node;
for(auto&& output : node.output())
{
result[output] = node;
}
}
return result;
}
template <class T>
static literal from_repeated(shape::type_t t, const T& r)
{
std::size_t size = r.size();
return literal{{t, {size}}, r.begin(), r.end()};
}
static literal parse_value(const onnx::AttributeProto& attr)
{
switch(attr.type())
{
case onnx::AttributeProto::UNDEFINED: return {};
case onnx::AttributeProto::FLOAT: return literal{attr.f()};
case onnx::AttributeProto::INT: return literal{attr.i()};
case onnx::AttributeProto::STRING: return {};
case onnx::AttributeProto::TENSOR: return parse_tensor(attr.t());
case onnx::AttributeProto::GRAPH: return {};
case onnx::AttributeProto::FLOATS: return from_repeated(shape::float_type, attr.floats());
case onnx::AttributeProto::INTS: return from_repeated(shape::int64_type, attr.ints());
case onnx::AttributeProto::STRINGS: return {};
case onnx::AttributeProto::TENSORS: return {};
case onnx::AttributeProto::GRAPHS: return {};
}
MIGRAPH_THROW("Invalid attribute type");
}
static literal parse_tensor(const onnx::TensorProto& t)
{
std::vector<std::size_t> dims(t.dims().begin(), t.dims().end());
if(t.has_raw_data())
{
const std::string& s = t.raw_data();
switch(t.data_type())
{
case onnx::TensorProto::UNDEFINED: throw std::runtime_error("");
case onnx::TensorProto::FLOAT: return literal{{shape::float_type, dims}, s.data()};
case onnx::TensorProto::UINT8: throw std::runtime_error("");
case onnx::TensorProto::INT8: return literal{{shape::int32_type, dims}, s.data()};
case onnx::TensorProto::UINT16: return literal{{shape::int32_type, dims}, s.data()};
case onnx::TensorProto::INT16: return literal{{shape::int32_type, dims}, s.data()};
case onnx::TensorProto::INT32: return literal{{shape::int32_type, dims}, s.data()};
case onnx::TensorProto::INT64: return literal{{shape::int64_type, dims}, s.data()};
case onnx::TensorProto::STRING: throw std::runtime_error("");
case onnx::TensorProto::BOOL: return literal{{shape::int32_type, dims}, s.data()};
case onnx::TensorProto::FLOAT16: throw std::runtime_error("");
case onnx::TensorProto::DOUBLE: return literal{{shape::double_type, dims}, s.data()};
case onnx::TensorProto::UINT32: throw std::runtime_error("");
case onnx::TensorProto::UINT64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX128: throw std::runtime_error("");
}
MIGRAPH_THROW("Invalid tensor type");
}
switch(t.data_type())
{
case onnx::TensorProto::UNDEFINED: throw std::runtime_error("");
case onnx::TensorProto::FLOAT:
return literal{{shape::float_type, dims}, t.float_data().begin(), t.float_data().end()};
case onnx::TensorProto::UINT8: throw std::runtime_error("");
case onnx::TensorProto::INT8:
return literal{{shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::UINT16:
return literal{{shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::INT16:
return literal{{shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::INT32:
return literal{{shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::INT64:
return literal{{shape::int64_type, dims}, t.int64_data().begin(), t.int64_data().end()};
case onnx::TensorProto::STRING: throw std::runtime_error("");
case onnx::TensorProto::BOOL:
return literal{{shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::FLOAT16: throw std::runtime_error("");
case onnx::TensorProto::DOUBLE:
return literal{
{shape::double_type, dims}, t.double_data().begin(), t.double_data().end()};
case onnx::TensorProto::UINT32: throw std::runtime_error("");
case onnx::TensorProto::UINT64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX128: throw std::runtime_error("");
}
MIGRAPH_THROW("Invalid tensor type");
}
static shape parse_type(const onnx::TypeProto& t)
{
shape::type_t shape_type{};
switch(t.tensor_type().elem_type())
{
case onnx::TensorProto::UNDEFINED:
break; // throw std::runtime_error("Unsupported type UNDEFINED");
case onnx::TensorProto::FLOAT: shape_type = shape::float_type; break;
case onnx::TensorProto::UINT8:
break; // throw std::runtime_error("Unsupported type UINT8");
case onnx::TensorProto::INT8: shape_type = shape::int8_type; break;
case onnx::TensorProto::UINT16: shape_type = shape::uint16_type; break;
case onnx::TensorProto::INT16: shape_type = shape::int16_type; break;
case onnx::TensorProto::INT32: shape_type = shape::int32_type; break;
case onnx::TensorProto::INT64: shape_type = shape::int64_type; break;
case onnx::TensorProto::STRING:
break; // throw std::runtime_error("Unsupported type STRING");
case onnx::TensorProto::BOOL:
break; // throw std::runtime_error("Unsupported type BOOL");
case onnx::TensorProto::FLOAT16:
break; // throw std::runtime_error("Unsupported type FLOAT16");
case onnx::TensorProto::DOUBLE: shape_type = shape::double_type; break;
case onnx::TensorProto::UINT32: shape_type = shape::uint32_type; break;
case onnx::TensorProto::UINT64: shape_type = shape::uint64_type; break;
case onnx::TensorProto::COMPLEX64:
break; // throw std::runtime_error("Unsupported type COMPLEX64");
case onnx::TensorProto::COMPLEX128:
break; // throw std::runtime_error("Unsupported type COMPLEX128");
}
std::vector<std::size_t> dims;
// TODO: USe std::transform
for(auto&& d : t.tensor_type().shape().dim())
{
dims.push_back(d.dim_value());
}
return {shape_type, dims};
}
};
program parse_onnx(const std::string& name)
{
std::fstream input(name.c_str(), std::ios::in | std::ios::binary);
onnx_parser parser;
#ifndef NDEBUG
// Log the program when it can't be parsed
try
{
parser.parse_from(input);
}
catch(...)
{
std::cerr << parser.prog << std::endl;
throw;
}
#else
parser.parse_from(input);
#endif
return std::move(parser.prog);
}
} // namespace migraph
#include <google/protobuf/text_format.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <onnx.pb.h>
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <functional>
#include <rtg/fallthrough.hpp>
#include <rtg/program.hpp>
#include <rtg/operators.hpp>
struct unknown
{
std::string op;
std::string name() const { return "unknown:" + op; }
rtg::shape compute_shape(std::vector<rtg::shape> input) const
{
if(input.empty())
return {};
else
return input.front();
}
rtg::argument compute(rtg::shape, std::vector<rtg::argument>) const
{
RTG_THROW("not computable");
}
friend std::ostream& operator<<(std::ostream& os, const unknown& x)
{
os << x.name();
return os;
}
};
template <class C, class T>
bool contains(C&& c, T&& x)
{
return c.find(x) != c.end();
}
template <class Range, class Iterator>
void copy(Range&& r, Iterator it)
{
std::copy(r.begin(), r.end(), it);
}
struct onnx_parser
{
using attribute_map = std::unordered_map<std::string, onnx::AttributeProto>;
using node_map = std::unordered_map<std::string, onnx::NodeProto>;
using op_func =
std::function<rtg::instruction_ref(attribute_map, std::vector<rtg::instruction_ref>)>;
node_map nodes;
std::unordered_map<std::string, rtg::instruction_ref> instructions;
rtg::program prog = rtg::program();
std::unordered_map<std::string, op_func> ops;
onnx_parser()
{
add_op("Conv", [this](attribute_map attributes, std::vector<rtg::instruction_ref> args) {
rtg::convolution op;
if(contains(attributes, "pads"))
{
copy(attributes["pads"].ints(), op.padding.begin());
}
if(contains(attributes, "strides"))
{
copy(attributes["strides"].ints(), op.stride.begin());
}
if(contains(attributes, "dilations"))
{
copy(attributes["dilations"].ints(), op.dilation.begin());
}
return prog.add_instruction(op, args);
});
add_op("MaxPool", [this](attribute_map attributes, std::vector<rtg::instruction_ref> args) {
rtg::pooling op{"max"};
// for(auto&& p:attributes) std::cout << p.first << std::endl;
if(contains(attributes, "pads"))
{
copy(attributes["pads"].ints(), op.padding.begin());
}
if(contains(attributes, "strides"))
{
copy(attributes["strides"].ints(), op.stride.begin());
}
if(contains(attributes, "kernel_shape"))
{
copy(attributes["kernel_shape"].ints(), op.lengths.begin());
}
return prog.add_instruction(op, args);
});
add_op("Relu", [this](attribute_map, std::vector<rtg::instruction_ref> args) {
return prog.add_instruction(rtg::activation{"relu"}, args);
});
add_op("Reshape", [this](attribute_map attributes, std::vector<rtg::instruction_ref> args) {
rtg::reshape op;
rtg::literal s = parse_value(attributes.at("shape"));
s.visit([&](auto v) { copy(v, std::back_inserter(op.dims)); });
return prog.add_instruction(op, args);
});
add_op("Constant", [this](attribute_map attributes, std::vector<rtg::instruction_ref>) {
rtg::literal v = parse_value(attributes.at("value"));
return prog.add_literal(v);
});
}
template <class F>
void add_op(std::string name, F f)
{
ops.emplace(name, f);
}
void parse_from(std::istream& is)
{
onnx::ModelProto model;
if(model.ParseFromIstream(&is))
{
if(model.has_graph())
{
this->parse_graph(model.graph());
}
}
else
{
throw std::runtime_error("Failed reading");
}
}
void parse_graph(const onnx::GraphProto& graph)
{
nodes = get_nodes(graph);
for(auto&& input : graph.input())
{
const std::string& name = input.name();
// TODO: Get shape of input parameter
rtg::shape s = parse_type(input.type());
instructions[name] = prog.add_parameter(name, s);
}
for(auto&& p : nodes)
{
this->parse_node(p.second.name());
}
}
void parse_node(std::string name)
{
if(instructions.count(name) == 0)
{
auto&& node = nodes.at(name);
std::vector<rtg::instruction_ref> args;
for(auto&& input : node.input())
{
if(nodes.count(input) > 0)
{
auto&& iname = nodes.at(input).name();
this->parse_node(iname);
args.push_back(instructions.at(iname));
}
else
{
args.push_back(instructions.at(input));
}
}
if(ops.count(node.op_type()) == 0)
{
instructions[name] = prog.add_instruction(unknown{node.op_type()}, args);
}
else
{
instructions[name] = ops[node.op_type()](get_attributes(node), args);
}
}
}
static attribute_map get_attributes(const onnx::NodeProto& node)
{
std::unordered_map<std::string, onnx::AttributeProto> result;
for(auto&& attr : node.attribute())
{
result[attr.name()] = attr;
}
return result;
}
static node_map get_nodes(const onnx::GraphProto& graph)
{
std::unordered_map<std::string, onnx::NodeProto> result;
for(auto&& node : graph.node())
{
result[node.name()] = node;
for(auto&& output : node.output())
{
result[output] = node;
}
}
return result;
}
static rtg::literal parse_value(const onnx::AttributeProto& attr)
{
switch(attr.type())
{
case onnx::AttributeProto::UNDEFINED: return {};
case onnx::AttributeProto::FLOAT: return rtg::literal{attr.f()};
case onnx::AttributeProto::INT: return rtg::literal{attr.i()};
case onnx::AttributeProto::STRING: return {};
case onnx::AttributeProto::TENSOR: return parse_tensor(attr.t());
case onnx::AttributeProto::GRAPH: return {};
case onnx::AttributeProto::FLOATS:
return rtg::literal{rtg::shape::float_type, attr.floats().begin(), attr.floats().end()};
case onnx::AttributeProto::INTS:
return rtg::literal{rtg::shape::int32_type, attr.ints().begin(), attr.ints().end()};
;
case onnx::AttributeProto::STRINGS: return {};
case onnx::AttributeProto::TENSORS: return {};
case onnx::AttributeProto::GRAPHS: return {};
}
RTG_THROW("Invalid attribute type");
}
static rtg::literal parse_tensor(const onnx::TensorProto& t)
{
std::vector<std::size_t> dims(t.dims().begin(), t.dims().end());
switch(t.data_type())
{
case onnx::TensorProto::UNDEFINED: throw std::runtime_error("");
case onnx::TensorProto::FLOAT:
return rtg::literal{
{rtg::shape::float_type, dims}, t.float_data().begin(), t.float_data().end()};
case onnx::TensorProto::UINT8: throw std::runtime_error("");
case onnx::TensorProto::INT8:
return rtg::literal{
{rtg::shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::UINT16:
return rtg::literal{
{rtg::shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::INT16:
return rtg::literal{
{rtg::shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::INT32:
return rtg::literal{
{rtg::shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::INT64:
return rtg::literal{
{rtg::shape::int64_type, dims}, t.int64_data().begin(), t.int64_data().end()};
case onnx::TensorProto::STRING: throw std::runtime_error("");
case onnx::TensorProto::BOOL:
return rtg::literal{
{rtg::shape::int32_type, dims}, t.int32_data().begin(), t.int32_data().end()};
case onnx::TensorProto::FLOAT16: throw std::runtime_error("");
case onnx::TensorProto::DOUBLE:
return rtg::literal{
{rtg::shape::double_type, dims}, t.double_data().begin(), t.double_data().end()};
case onnx::TensorProto::UINT32: throw std::runtime_error("");
case onnx::TensorProto::UINT64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX64: throw std::runtime_error("");
case onnx::TensorProto::COMPLEX128: throw std::runtime_error("");
}
RTG_THROW("Invalid tensor type");
}
static rtg::shape parse_type(const onnx::TypeProto& t)
{
rtg::shape::type_t shape_type{};
switch(t.tensor_type().elem_type())
{
case onnx::TensorProto::UNDEFINED:
break; // throw std::runtime_error("Unsupported type UNDEFINED");
case onnx::TensorProto::FLOAT: shape_type = rtg::shape::float_type; break;
case onnx::TensorProto::UINT8:
break; // throw std::runtime_error("Unsupported type UINT8");
case onnx::TensorProto::INT8: shape_type = rtg::shape::int8_type; break;
case onnx::TensorProto::UINT16: shape_type = rtg::shape::uint16_type; break;
case onnx::TensorProto::INT16: shape_type = rtg::shape::int16_type; break;
case onnx::TensorProto::INT32: shape_type = rtg::shape::int32_type; break;
case onnx::TensorProto::INT64: shape_type = rtg::shape::int64_type; break;
case onnx::TensorProto::STRING:
break; // throw std::runtime_error("Unsupported type STRING");
case onnx::TensorProto::BOOL:
break; // throw std::runtime_error("Unsupported type BOOL");
case onnx::TensorProto::FLOAT16:
break; // throw std::runtime_error("Unsupported type FLOAT16");
case onnx::TensorProto::DOUBLE: shape_type = rtg::shape::double_type; break;
case onnx::TensorProto::UINT32: shape_type = rtg::shape::uint32_type; break;
case onnx::TensorProto::UINT64: shape_type = rtg::shape::uint64_type; break;
case onnx::TensorProto::COMPLEX64:
break; // throw std::runtime_error("Unsupported type COMPLEX64");
case onnx::TensorProto::COMPLEX128:
break; // throw std::runtime_error("Unsupported type COMPLEX128");
}
std::vector<std::size_t> dims;
// TODO: USe std::transform
for(auto&& d : t.tensor_type().shape().dim())
{
dims.push_back(d.dim_value());
}
return {shape_type, dims};
}
};
#include <migraph/onnx.hpp>
int main(int argc, char const* argv[])
{
if(argc > 1)
{
std::string file = argv[1];
std::fstream input(file.c_str(), std::ios::in | std::ios::binary);
onnx_parser parser;
try
{
parser.parse_from(input);
}
catch(...)
{
std::cout << parser.prog << std::endl;
throw;
}
std::cout << parser.prog << std::endl;
auto prog = migraph::parse_onnx(file);
std::cout << prog << std::endl;
}
}
#include <migraph/onnx.hpp>
#include <migraph/cpu/cpu_target.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/generate.hpp>
#include <miopen/miopen.h>
#include <migraph/gpu/miopen.hpp>
migraph::argument run_cpu(std::string file)
{
auto p = migraph::parse_onnx(file);
p.compile(migraph::cpu::cpu_target{});
auto s = p.get_parameter_shape("Input3");
auto input3 = migraph::generate_argument(s);
auto out = p.eval({{"Input3", input3}});
std::cout << p << std::endl;
return out;
}
migraph::argument run_gpu(std::string file)
{
auto p = migraph::parse_onnx(file);
p.compile(migraph::cpu::cpu_target{});
auto s = p.get_parameter_shape("Input3");
auto input3 = migraph::gpu::to_gpu(migraph::generate_argument(s));
auto output = migraph::gpu::to_gpu(migraph::generate_argument(p.get_parameter_shape("output")));
auto handle = migraph::gpu::make_obj<migraph::gpu::miopen_handle>(&miopenCreate);
auto out = p.eval({{"Input3", input3}, {"output", output}});
std::cout << p << std::endl;
return migraph::gpu::from_gpu(out);
}
int main(int argc, char const* argv[])
{
if(argc > 1)
{
std::string file = argv[1];
auto x = run_cpu(file);
auto y = run_gpu(file);
if(x == y)
{
std::cout << "Passed" << std::endl;
}
else
{
std::cout << "Not equal" << std::endl;
std::cout << x << std::endl;
std::cout << y << std::endl;
}
}
}
#include <rtg/program.hpp>
#include <rtg/stringutils.hpp>
#include <rtg/instruction.hpp>
#include <migraph/program.hpp>
#include <migraph/stringutils.hpp>
#include <migraph/instruction.hpp>
#include <iostream>
#include <sstream>
#include <algorithm>
namespace rtg {
namespace migraph {
struct program_impl
{
// A list is used to keep references to an instruction stable
std::list<instruction> instructions;
context ctx;
};
const operation& get_operation(instruction_ref ins) { return ins->op; }
program::program() : impl(std::make_unique<program_impl>()) {}
program::program(program&&) noexcept = default;
......@@ -28,11 +32,13 @@ program::insert_instruction(instruction_ref ins, operation op, std::vector<instr
assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
"Argument is not an exisiting instruction");
assert(not starts_with(op.name(), "@"));
// TODO: Use move
shape r = compute_shape(op, args);
auto result = impl->instructions.insert(ins, {op, r, args});
backreference(result);
assert(result->arguments == args);
assert(result->valid(begin()));
return result;
}
......@@ -42,13 +48,66 @@ program::replace_instruction(instruction_ref ins, operation op, std::vector<inst
assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
"Argument is not an exisiting instruction");
assert(not starts_with(op.name(), "@"));
shape r = compute_shape(op, args);
ins->replace(op, r, args);
backreference(ins);
assert(ins->valid(begin()));
return ins;
}
instruction_ref program::replace_instruction(instruction_ref ins, instruction_ref rep)
{
assert(has_instruction(ins));
assert(has_instruction(rep));
assert(ins != rep);
// TODO: Should it be an error if the output is empty?
if(ins->output.empty())
{
return rep;
}
for(auto&& out : ins->output)
{
// TODO: Check for possible cycles
if(out != rep)
{
replace_argument(out, ins, rep);
}
assert(out->valid(begin()));
}
// Replacement should not be dead code unless its the last instruction
assert(!rep->output.empty() or rep == std::prev(end()));
assert(ins->valid(begin()));
assert(rep->valid(begin()));
return rep;
}
instruction_ref program::remove_instruction(instruction_ref ins)
{
assert(has_instruction(ins));
assert(ins->output.empty());
ins->clear_arguments();
return impl->instructions.erase(ins);
}
instruction_ref program::remove_instructions(instruction_ref first, instruction_ref last)
{
if(first == last)
return first;
// TODO: Check every element
assert(has_instruction(first));
std::for_each(first, last, [&](instruction& ins) { ins.clear_arguments(); });
assert(std::all_of(first, last, [&](instruction& ins) { return ins.output.empty(); }));
return impl->instructions.erase(first, last);
}
instruction_ref program::move_instruction(instruction_ref src, instruction_ref dst)
{
impl->instructions.splice(dst, impl->instructions, src);
return src;
}
instruction_ref program::add_literal(literal l)
{
impl->instructions.emplace_front(std::move(l));
......@@ -67,7 +126,7 @@ instruction_ref program::add_parameter(std::string name, shape s)
return impl->instructions.begin();
}
shape program::get_parameter_shape(std::string name)
shape program::get_parameter_shape(std::string name) const
{
auto ins = std::find_if(
impl->instructions.begin(), impl->instructions.end(), [&](const instruction& x) {
......@@ -86,6 +145,20 @@ shape program::get_parameter_shape(std::string name)
return {};
}
std::unordered_map<std::string, shape> program::get_parameter_shapes() const
{
std::unordered_map<std::string, shape> result;
for(auto&& ins : impl->instructions)
{
if(ins.op.name() == "@param")
{
auto&& name = any_cast<builtin::param>(ins.op).parameter;
result[name] = ins.result;
}
}
return result;
}
bool program::has_instruction(instruction_ref ins) const
{
return std::find_if(
......@@ -94,27 +167,46 @@ bool program::has_instruction(instruction_ref ins) const
}) != impl->instructions.end();
}
instruction_ref program::begin() { return impl->instructions.begin(); }
instruction_ref program::end() { return impl->instructions.end(); }
instruction_ref program::begin() const { return impl->instructions.begin(); }
instruction_ref program::end() const { return impl->instructions.end(); }
shape program::get_shape() const { return impl->instructions.back().result; }
instruction_ref program::validate() const
{
return std::find_if(impl->instructions.begin(),
impl->instructions.end(),
[](const instruction& i) { return i.valid(); });
[&](const instruction& i) { return !i.valid(impl->instructions.begin()); });
}
void program::compile(const target& t)
{
assert(this->validate() != impl->instructions.end());
t.apply(*this);
if(this->validate() == impl->instructions.end())
RTG_THROW("Invalid program from compilation");
assert(this->validate() == impl->instructions.end());
this->impl->ctx = t.get_context();
for(auto&& p : t.get_passes(this->impl->ctx))
{
p.apply(*this);
#ifndef NDEBUG
auto invalid = this->validate();
if(invalid != impl->instructions.end())
{
auto index = std::distance(impl->instructions.begin(), invalid);
MIGRAPH_THROW(p.name() + " pass produces invalid program at instruction " +
std::to_string(index) + ": " + invalid->op.name());
}
#endif
}
auto invalid = this->validate();
if(invalid != impl->instructions.end())
{
auto index = std::distance(impl->instructions.begin(), invalid);
MIGRAPH_THROW("Invalid program from compilation at instruction " + std::to_string(index));
}
}
argument program::eval(std::unordered_map<std::string, argument> params) const
{
assert(this->validate() != impl->instructions.end());
assert(this->validate() == impl->instructions.end());
std::unordered_map<const instruction*, argument> results;
argument result;
for(auto& ins : impl->instructions)
......@@ -138,13 +230,15 @@ argument program::eval(std::unordered_map<std::string, argument> params) const
ins.arguments.end(),
values.begin(),
[&](instruction_ref i) { return results.at(std::addressof(*i)); });
result = ins.op.compute(ins.result, values);
result = ins.op.compute(this->impl->ctx, ins.result, values);
}
results.emplace(std::addressof(ins), result);
}
return result;
}
bool operator==(const program& x, const program& y) { return to_string(x) == to_string(y); }
std::ostream& operator<<(std::ostream& os, const program& p)
{
std::unordered_map<const instruction*, std::string> names;
......@@ -192,4 +286,4 @@ std::ostream& operator<<(std::ostream& os, const program& p)
return os;
}
} // namespace rtg
} // namespace migraph
#include <rtg/shape.hpp>
#include <rtg/stringutils.hpp>
#include <migraph/shape.hpp>
#include <migraph/stringutils.hpp>
#include <numeric>
#include <algorithm>
#include <functional>
#include <iostream>
namespace rtg {
namespace migraph {
shape::shape() : m_type(float_type), m_packed(false) {}
shape::shape() : m_type(float_type), m_standard(false) {}
shape::shape(type_t t) : m_type(t), m_lens({1}), m_strides({1}), m_packed(true) {}
shape::shape(type_t t, std::vector<std::size_t> l) : m_type(t), m_lens(std::move(l)), m_packed(true)
shape::shape(type_t t) : m_type(t), m_lens({1}), m_strides({1}), m_standard(true) {}
shape::shape(type_t t, std::vector<std::size_t> l)
: m_type(t), m_lens(std::move(l)), m_standard(true)
{
this->calculate_strides();
assert(m_lens.size() == m_strides.size());
......@@ -19,7 +21,9 @@ shape::shape(type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s)
: m_type(t), m_lens(std::move(l)), m_strides(std::move(s))
{
assert(m_lens.size() == m_strides.size());
m_packed = this->elements() == this->element_space();
assert(std::any_of(m_strides.begin(), m_strides.end(), [](auto x) { return x > 0; }) and
"At least one stride must be non-zero");
m_standard = this->packed() and not this->transposed();
}
void shape::calculate_strides()
......@@ -39,6 +43,8 @@ const std::vector<std::size_t>& shape::strides() const { return this->m_strides;
std::size_t shape::elements() const
{
assert(this->lens().size() == this->strides().size());
if(this->lens().empty())
return 0;
return std::accumulate(
this->lens().begin(), this->lens().end(), std::size_t{1}, std::multiplies<std::size_t>());
}
......@@ -63,27 +69,48 @@ std::size_t shape::index(const std::vector<std::size_t>& l) const
std::size_t shape::index(std::size_t i) const
{
assert(this->lens().size() == this->strides().size());
return std::inner_product(
this->lens().begin(),
this->lens().end(),
this->strides().begin(),
std::size_t{0},
std::plus<std::size_t>{},
[&](std::size_t len, std::size_t stride) { return ((i / stride) % len) * stride; });
if(this->standard())
return i;
else
return std::inner_product(this->lens().begin(),
this->lens().end(),
this->strides().begin(),
std::size_t{0},
std::plus<std::size_t>{},
[&](std::size_t len, std::size_t stride) {
assert(stride > 0 and len > 0);
return ((i / stride) % len) * stride;
});
}
bool shape::packed() const { return this->m_packed; }
bool shape::packed() const { return this->elements() == this->element_space(); }
bool shape::transposed() const
{
return not std::is_sorted(this->strides().rbegin(), this->strides().rend());
}
bool shape::broadcasted() const
{
assert(this->lens().size() == this->strides().size());
return std::accumulate(this->strides().begin(),
this->strides().end(),
std::size_t{1},
std::multiplies<std::size_t>()) == 0;
}
bool shape::standard() const { return this->m_standard; }
std::size_t shape::element_space() const
{
// TODO: Get rid of intermediate vector
assert(this->lens().size() == this->strides().size());
std::vector<std::size_t> max_indices(this->lens().size());
std::transform(this->lens().begin(),
this->lens().end(),
std::vector<std::size_t>(this->lens().size(), 1).begin(),
max_indices.begin(),
std::minus<std::size_t>());
return std::inner_product(
max_indices.begin(), max_indices.end(), this->strides().begin(), std::size_t{0}) +
if(this->lens().empty())
return 0;
return std::inner_product(this->lens().begin(),
this->lens().end(),
this->strides().begin(),
std::size_t{0},
std::plus<std::size_t>{},
[](std::size_t l, std::size_t s) { return (l - 1) * s; }) +
1;
}
......@@ -91,13 +118,12 @@ std::string shape::type_string() const
{
switch(this->m_type)
{
case any_type: return "any";
#define RTG_SHAPE_TYPE_STRING_CASE(x, t) \
#define MIGRAPH_SHAPE_TYPE_STRING_CASE(x, t) \
case x: return #x;
RTG_SHAPE_VISIT_TYPES(RTG_SHAPE_TYPE_STRING_CASE)
#undef RTG_SHAPE_TYPE_STRING_CASE
MIGRAPH_SHAPE_VISIT_TYPES(MIGRAPH_SHAPE_TYPE_STRING_CASE)
#undef MIGRAPH_SHAPE_TYPE_STRING_CASE
}
RTG_THROW("Invalid type");
MIGRAPH_THROW("Invalid type");
}
bool operator==(const shape& x, const shape& y)
......@@ -109,9 +135,9 @@ bool operator!=(const shape& x, const shape& y) { return !(x == y); }
std::ostream& operator<<(std::ostream& os, const shape& x)
{
os << x.type_string() << ", ";
os << "{" << to_string(x.lens()) << "}, ";
os << "{" << to_string(x.strides()) << "}";
os << "{" << to_string_range(x.lens()) << "}, ";
os << "{" << to_string_range(x.strides()) << "}";
return os;
}
} // namespace rtg
} // namespace migraph
#include <migraph/simplify_reshapes.hpp>
#include <migraph/program.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/ranges.hpp>
#include <unordered_set>
namespace migraph {
bool is_reshaper(const std::string& name)
{
// clang-format off
static const std::unordered_set<std::string> names = {
"reshape",
"transpose",
// "broadcast",
"contiguous"
};
// clang-format on
return contains(names, name);
}
void simplify_reshapes::apply(program& p) const
{
for(auto ins : iterator_for(p))
{
if(not is_reshaper(ins->op.name()))
continue;
if(ins->output.size() != 1)
continue;
if(is_reshaper(ins->output.front()->op.name()))
continue;
// Gather reshapes
std::vector<instruction_ref> reshapes{ins};
while(is_reshaper(reshapes.back()->op.name()))
{
assert(!reshapes.back()->arguments.empty());
assert(p.has_instruction(reshapes.back()->arguments.front()));
reshapes.push_back(reshapes.back()->arguments.front());
}
std::pair<instruction_ref, instruction_ref> r{p.end(), p.end()};
for(auto start : iterator_for(reshapes))
{
auto last = std::find_if(reshapes.rbegin(), reshapes.rend(), [&](auto&& i) {
return i->result == (*start)->result and i != (*start);
});
if(last != reshapes.rend())
{
r = std::make_pair(*start, *last);
break;
}
}
if(r.first != r.second)
{
p.replace_instruction(r.first, r.second);
}
}
}
} // namespace migraph
add_library(rtg_cpu
add_library(migraph_cpu
cpu_target.cpp
cpu_lowering.cpp
gemm.cpp
)
rocm_clang_tidy_check(rtg_cpu)
target_link_libraries(rtg_cpu rtg)
target_include_directories(rtg_cpu PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
find_path(BLAZE_INCLUDE blaze/Blaze.h)
find_package(Threads)
rocm_clang_tidy_check(migraph_cpu)
target_link_libraries(migraph_cpu migraph Threads::Threads)
target_include_directories(migraph_cpu PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraph_cpu PRIVATE ${BLAZE_INCLUDE})
target_compile_definitions(migraph_cpu PRIVATE -DBLAZE_USE_CPP_THREADS)
#include <migraph/cpu/cpu_lowering.hpp>
#include <migraph/instruction.hpp>
#include <migraph/dfor.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/cpu/gemm.hpp>
#include <unordered_map>
namespace migraph {
namespace cpu {
template <typename T>
T zero(const T&)
{
return T(0);
}
//
// cpu implemenataion of batch norm for inference
//
// inputs are:
// args[0] -> input data buffer
// args[1] -> mini batch mean
// args[2] -> mini batch variance
// args[3] -> gamma
// args[4] -> bias
//
// The equation to compute batch norm for inference is:
//
// output[i] = bias + gamma * (input[i] + mean) / sqrt(variance + epsilon)
//
// the input data format should be nchw
//
struct cpu_batch_norm_inference
{
batch_norm_inference op;
std::string name() const { return "cpu::batch_norm_inference"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument output{output_shape};
double epsilon = op.epsilon;
auto input = args[0];
auto mini_batch_mean = args[1];
auto mini_batch_variance = args[2];
auto arg_gamma = args[3];
auto arg_bias = args[4];
auto num_batch = output_shape.lens()[0];
auto num_channels = output_shape.lens()[1];
auto image_height = output_shape.lens()[2];
auto image_width = output_shape.lens()[3];
if(op.bn_mode == batch_norm_inference::spatial)
{
visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
dfor(num_batch, num_channels, image_height, image_width)(
[&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
result(n, c, h, w) = gamma(c) * (buffer(n, c, h, w) - mean(c)) /
std::sqrt(variance(c) + epsilon) +
bias(c);
});
});
}
if(op.bn_mode == batch_norm_inference::per_activation)
{
visit_all(output, input, mini_batch_mean, mini_batch_mean, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
dfor(num_batch, num_channels, image_height, image_width)(
[&](std::size_t n, std::size_t c, std::size_t h, std::size_t w) {
result(n, c, h, w) = gamma(c, h, w) *
(buffer(n, c, h, w) - mean(c, h, w)) /
std::sqrt(variance(c, h, w) + epsilon) +
bias(c, h, w);
});
});
}
return output;
}
};
struct cpu_convolution
{
convolution op;
std::string name() const { return "cpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
auto wei_c = weights.get_shape().lens()[1];
auto wei_h = weights.get_shape().lens()[2];
auto wei_w = weights.get_shape().lens()[3];
dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
}
});
output(o, w, i, j) = acc;
});
});
return result;
}
};
struct max_pool
{
static std::string name() { return "max"; }
static double start() { return std::numeric_limits<double>::lowest(); }
static double apply(double x, double y)
{
double m = std::max(x, y);
return (m);
}
static double final(double x, double) { return (x); }
};
struct avg_pool
{
static std::string name() { return "average"; }
static double start() { return 0.0; }
static double apply(double x, double y) { return x + y; }
static double final(double x, double y) { return x / y; }
};
template <class Op>
struct cpu_pooling
{
pooling op;
std::string name() const { return "cpu::pooling_" + Op::name(); }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
using type = typename decltype(output)::value_type;
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x0 = i * op.stride[0] - op.padding[0];
const int start_y0 = j * op.stride[1] - op.padding[1];
const int hend = std::min(start_x0 + op.lengths[0], in_h);
const int wend = std::min(start_y0 + op.lengths[1], in_w);
const int start_x = std::max(start_x0, 0);
const int start_y = std::max(start_y0, 0);
const int w_h = (hend - start_x);
const int w_w = (wend - start_y);
const int pool_size = std::max(w_h * w_w, 1);
double acc = Op::start();
dfor(w_h, w_w)([&](int x, int y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc = Op::apply(acc, input(o, w, in_x, in_y));
}
});
output(o, w, i, j) = type(Op::final(acc, pool_size));
});
});
return result;
}
};
struct cpu_contiguous
{
contiguous op;
std::string name() const { return "cpu::contiguous"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());
});
});
return result;
}
};
struct cpu_gemm
{
gemm op;
std::string name() const { return "cpu::gemm"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
migemm(result, args[0], args[1], op.alpha, op.beta);
return result;
}
};
struct identity_op
{
std::string name() const { return "cpu::identity"; }
auto fcn() const
{
return [](auto x) { return x; };
}
};
struct abs_op
{
std::string name() const { return "cpu::abs"; }
auto fcn() const
{
return [](auto x) { return std::abs(x); };
}
};
struct exp_op
{
std::string name() const { return "cpu::exp"; }
auto fcn() const
{
return [](auto x) { return std::exp(x); };
}
};
struct sin_op
{
std::string name() const { return "cpu::sin"; }
auto fcn() const
{
return [](auto x) { return std::sin(x); };
}
};
struct cos_op
{
std::string name() const { return "cpu::cos"; }
auto fcn() const
{
return [](auto x) { return std::cos(x); };
}
};
struct tan_op
{
std::string name() const { return "cpu::tan"; }
auto fcn() const
{
return [](auto x) { return std::tan(x); };
}
};
struct asin_op
{
std::string name() const { return "cpu::asin"; }
auto fcn() const
{
return [](auto x) { return std::asin(x); };
}
};
struct acos_op
{
std::string name() const { return "cpu::acos"; }
auto fcn() const
{
return [](auto x) { return std::acos(x); };
}
};
struct atan_op
{
std::string name() const { return "cpu::atan"; }
auto fcn() const
{
return [](auto x) { return std::atan(x); };
}
};
struct tanh_op
{
std::string name() const { return "cpu::tanh"; }
auto fcn() const
{
return [](auto x) { return std::tanh(x); };
}
};
struct sigmoid_op
{
std::string name() const { return "cpu::sigmoid"; }
auto fcn() const
{
return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
}
};
struct neg_op
{
std::string name() const { return "cpu::neg"; }
auto fcn() const
{
return [](auto x) { return -x; };
}
};
struct relu_op
{
std::string name() const { return "cpu::relu"; }
auto fcn() const
{
return [](auto x) { return x > 0 ? x : 0; };
}
};
template <typename Op>
struct cpu_unary
{
Op op;
std::string name() const { return op.name(); }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto output) {
args[0].visit([&](auto input) {
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
});
});
return result;
}
};
struct softmax2d
{
std::string name() const { return "cpu::softmax2d"; }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
using value_type = typename decltype(input)::value_type;
auto nb = input.get_shape().lens()[0];
auto nc = input.get_shape().lens()[1];
auto nh = input.get_shape().lens()[2];
auto nw = input.get_shape().lens()[3];
dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
value_type cmax = std::numeric_limits<value_type>::lowest();
for(int c = 0; c < nc; c++)
{
cmax = std::max(cmax, input(b, c, i, j));
}
for(int c = 0; c < nc; c++)
{
output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
}
value_type sum = value_type(0);
for(int c = 0; c < nc; c++)
{
sum += output(b, c, i, j);
}
for(int c = 0; c < nc; c++)
{
output(b, c, i, j) = output(b, c, i, j) / sum;
}
});
});
return result;
}
};
struct add_op
{
std::string name() const { return "add"; }
auto fcn() const
{
return [](auto x, auto y) { return x + y; };
}
};
struct sub_op
{
std::string name() const { return "sub"; }
auto fcn() const
{
return [](auto x, auto y) { return x - y; };
}
};
struct mul_op
{
std::string name() const { return "mul"; }
auto fcn() const
{
return [](auto x, auto y) { return x * y; };
}
};
struct div_op
{
std::string name() const { return "div"; }
auto fcn() const
{
return [](auto x, auto y) { return x / y; };
}
};
template <typename Op>
struct cpu_binary
{
Op op;
std::string name() const { return op.name(); }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
if(input1.get_shape().packed() and input2.get_shape().packed())
{
std::transform(
input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
}
else
{
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
op.fcn()(input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end()));
});
}
});
return result;
}
};
struct cpu_apply
{
program* prog;
std::unordered_map<std::string, std::function<void(instruction_ref)>> apply_map{};
template <class T>
auto simple_op()
{
return [this](instruction_ref ins) { apply_simple_op<T>(ins); };
}
template <class T, class Op>
auto extend_op()
{
return [this](instruction_ref ins) { apply_extend_op<T, Op>(ins); };
}
void init()
{
apply_map["convolution"] = extend_op<cpu_convolution, convolution>();
apply_map["gemm"] = extend_op<cpu_gemm, gemm>();
apply_map["batch_norm_inference"] =
extend_op<cpu_batch_norm_inference, batch_norm_inference>();
apply_map["contiguous"] = extend_op<cpu_contiguous, contiguous>();
apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>();
apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>();
apply_map["exp"] = simple_op<cpu_unary<exp_op>>();
apply_map["neg"] = simple_op<cpu_unary<neg_op>>();
apply_map["sin"] = simple_op<cpu_unary<sin_op>>();
apply_map["cos"] = simple_op<cpu_unary<cos_op>>();
apply_map["tan"] = simple_op<cpu_unary<tan_op>>();
apply_map["add"] = simple_op<cpu_binary<add_op>>();
apply_map["sub"] = simple_op<cpu_binary<sub_op>>();
apply_map["mul"] = simple_op<cpu_binary<mul_op>>();
apply_map["div"] = simple_op<cpu_binary<div_op>>();
apply_map["softmax"] = simple_op<softmax2d>();
}
void apply()
{
init();
for(auto it : iterator_for(*prog))
{
if(it->op.name() == "activation")
{
apply_activation(it);
}
else if(it->op.name() == "pooling")
{
apply_pooling(it);
}
else if(apply_map.count(it->op.name()) > 0)
{
apply_map.at(it->op.name())(it);
}
}
}
template <class T>
void apply_simple_op(instruction_ref ins)
{
prog->replace_instruction(ins, T{}, ins->arguments);
}
template <class T, class Op>
void apply_extend_op(instruction_ref ins)
{
auto&& op = any_cast<Op>(ins->op);
prog->replace_instruction(ins, T{op}, ins->arguments);
}
void apply_activation(instruction_ref ins)
{
auto&& op = any_cast<activation>(ins->op);
if(op.mode == "relu")
prog->replace_instruction(ins, cpu_unary<relu_op>{}, ins->arguments);
}
void apply_pooling(instruction_ref ins)
{
auto&& op = any_cast<pooling>(ins->op);
if(op.mode == "max")
prog->replace_instruction(ins, cpu_pooling<max_pool>{op}, ins->arguments);
else if(op.mode == "average")
prog->replace_instruction(ins, cpu_pooling<avg_pool>{op}, ins->arguments);
}
};
void cpu_lowering::apply(program& p) const { cpu_apply{&p}.apply(); }
} // namespace cpu
} // namespace migraph
#include <rtg/cpu/cpu_target.hpp>
#include <rtg/instruction.hpp>
#include <rtg/dfor.hpp>
#include <rtg/operators.hpp>
#include <migraph/cpu/cpu_target.hpp>
#include <migraph/cpu/cpu_lowering.hpp>
namespace rtg {
namespace migraph {
namespace cpu {
struct cpu_convolution
{
convolution op;
std::string name() const { return "cpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
auto wei_c = weights.get_shape().lens()[1];
auto wei_h = weights.get_shape().lens()[2];
auto wei_w = weights.get_shape().lens()[3];
dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
}
});
output(o, w, i, j) = acc;
});
});
return result;
}
};
struct relu
{
std::string name() const { return "cpu::relu"; }
shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
argument compute(shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto output) {
args[0].visit([&](auto input) {
std::transform(input.begin(), input.end(), output.begin(), [](auto x) {
return x > 0 ? x : 0;
});
});
});
return result;
}
};
struct cpu_apply
{
program* prog;
void apply()
{
for(auto it = prog->begin(); it != prog->end(); it++)
{
if(it->op.name() == "convolution")
{
apply_convolution(it);
}
else if(it->op.name() == "activation")
{
apply_activation(it);
}
}
}
void apply_convolution(instruction_ref ins)
{
auto&& op = any_cast<convolution>(ins->op);
prog->replace_instruction(ins, cpu_convolution{op}, ins->arguments);
}
void apply_activation(instruction_ref ins)
{
auto&& op = any_cast<activation>(ins->op);
if(op.mode == "relu")
prog->replace_instruction(ins, relu{}, ins->arguments);
}
};
std::string cpu_target::name() const { return "cpu"; }
void cpu_target::apply(program& p) const { cpu_apply{&p}.apply(); }
std::vector<pass> cpu_target::get_passes(context&) const { return {cpu_lowering{}}; }
} // namespace cpu
} // namespace rtg
} // namespace migraph
#include <migraph/cpu/gemm.hpp>
#include <migraph/dfor.hpp>
#include <migraph/requires.hpp>
#include <blaze/math/CustomMatrix.h>
namespace migraph {
namespace cpu {
template <class T>
using matrix = blaze::CustomMatrix<T, blaze::unaligned, blaze::unpadded>; // NOLINT
template <class T>
static auto make_mat(tensor_view<T> x)
{
const auto& s = x.get_shape();
assert(s.lens().size() == 2);
if(s.transposed())
return matrix<T>{x.data(), s.lens()[1], s.lens()[0], s.strides()[1]};
return matrix<T>{x.data(), s.lens()[0], s.lens()[1], s.strides()[0]};
}
template <class T, class F>
static void visit_mat(tensor_view<T> x, F f)
{
auto mat = make_mat(x);
if(x.get_shape().transposed())
f(blaze::trans(mat));
else
f(mat);
}
template <class T>
struct is_fast_gemm_type : std::false_type
{
};
template <>
struct is_fast_gemm_type<float> : std::true_type
{
};
template <class T>
void migemm_impl(tensor_view<T> cmat,
tensor_view<T> amat,
tensor_view<T> bmat,
float alpha,
float beta,
std::true_type)
{
visit_mat(amat, [&](const auto& a) {
visit_mat(bmat, [&](const auto& b) {
auto c = make_mat(cmat);
c = (a * b) * alpha + beta * c;
});
});
}
template <class T>
void migemm_impl(tensor_view<T> cmat,
tensor_view<T> amat,
tensor_view<T> bmat,
float alpha,
float beta,
std::false_type)
{
auto m = cmat.get_shape().lens()[0];
auto n = cmat.get_shape().lens()[1];
auto k = amat.get_shape().lens()[1];
assert(amat.get_shape().lens()[1] == bmat.get_shape().lens()[0]);
assert(m == amat.get_shape().lens()[0]);
assert(n == bmat.get_shape().lens()[1]);
dfor(m, n)([&](auto ii, auto jj) {
double s = cmat(ii, jj) * beta;
dfor(k)([&](auto kk) { s += amat(ii, kk) * bmat(kk, jj); });
cmat(ii, jj) = alpha * s;
});
}
template <class T>
void migemm_impl(
tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, float alpha, float beta)
{
migemm_impl(cmat, amat, bmat, alpha, beta, is_fast_gemm_type<T>{});
}
void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta)
{
visit_all(c_arg, a_arg, b_arg)(
[&](auto cmat, auto amat, auto bmat) { migemm_impl(cmat, amat, bmat, alpha, beta); });
}
} // namespace cpu
} // namespace migraph
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <migraph/program.hpp>
namespace migraph {
namespace cpu {
struct cpu_lowering
{
std::string name() const { return "cpu::lowering"; }
void apply(program& p) const;
};
} // namespace cpu
} // namespace migraph
#endif
#ifndef MIGRAPH_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#define MIGRAPH_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#include <migraph/program.hpp>
namespace migraph {
namespace cpu {
struct cpu_target
{
std::string name() const;
std::vector<pass> get_passes(context& ctx) const;
context get_context() const { return {}; }
};
} // namespace cpu
} // namespace migraph
#endif
#ifndef MIGRAPH_GUARD_RTGLIB_CPU_GEMM_HPP
#define MIGRAPH_GUARD_RTGLIB_CPU_GEMM_HPP
#include <migraph/argument.hpp>
namespace migraph {
namespace cpu {
void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta);
} // namespace cpu
} // namespace migraph
#endif
#ifndef RTG_GUARD_RTGLIB_CPU_TARGET_HPP
#define RTG_GUARD_RTGLIB_CPU_TARGET_HPP
#include <rtg/program.hpp>
namespace rtg {
namespace cpu {
struct cpu_target
{
std::string name() const;
void apply(program& p) const;
};
} // namespace cpu
} // namespace rtg
#endif
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc)
find_package(miopen)
# rocblas
find_package(rocblas REQUIRED PATHS /opt/rocm)
message(STATUS "Build with rocblas")
if(NOT TARGET MIOpen)
message(SEND_ERROR "Cant find miopen")
endif()
add_library(migraph_device
hip_contiguous.cpp
)
rocm_clang_tidy_check(migraph_device)
target_link_libraries(migraph_device migraph hip::device)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
add_library(migraph_gpu
hip.cpp
target.cpp
lowering.cpp
write_literals.cpp
rocblas.cpp
)
rocm_clang_tidy_check(migraph_gpu)
target_link_libraries(migraph_gpu migraph MIOpen migraph_device roc::rocblas)
target_include_directories(migraph_gpu PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
#include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp>
#include <miopen/miopen.h>
#include <vector>
namespace migraph {
namespace gpu {
using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }
hip_ptr allocate_gpu(std::size_t sz)
{
void* result;
auto status = hipMalloc(&result, sz);
if(status != hipSuccess)
MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status));
return hip_ptr{result};
}
template <class T>
hip_ptr write_to_gpu(const T& x)
{
using type = typename T::value_type;
auto size = x.size() * sizeof(type);
return write_to_gpu(x.data(), size);
}
template <class T>
std::vector<T> read_from_gpu(const void* x, std::size_t sz)
{
std::vector<T> result(sz);
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess)
MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status));
return result;
}
hip_ptr write_to_gpu(const void* x, std::size_t sz)
{
auto result = allocate_gpu(sz);
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
if(status != hipSuccess)
MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
return result;
}
argument allocate_gpu(shape s)
{
auto p = share(allocate_gpu(s.bytes() + 1));
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
}
argument to_gpu(argument arg)
{
auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes()));
return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
}
argument from_gpu(argument arg)
{
argument result;
arg.visit([&](auto x) {
using type = typename decltype(x)::value_type;
auto v = read_from_gpu<type>(arg.data(), x.get_shape().bytes() / sizeof(type));
result = {x.get_shape(), [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
});
return result;
}
} // namespace gpu
} // namespace migraph
#include <hip/hip_runtime.h>
#include <migraph/operators.hpp>
namespace migraph {
namespace gpu {
struct index
{
std::size_t global;
std::size_t local;
std::size_t group;
};
template <class F>
__global__ void launcher(F f)
{
index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x};
f(idx);
}
auto launch(std::size_t global, std::size_t local)
{
return [=](auto f) {
assert(local > 0);
assert(global > 0);
using f_type = decltype(f);
dim3 nblocks(global / local);
dim3 nthreads(local);
hipLaunchKernelGGL((launcher<f_type>), nblocks, nthreads, 0, nullptr, f);
};
}
template <class F>
void visit_tensor_size(std::size_t n, F f)
{
switch(n)
{
case 1:
{
f(std::integral_constant<std::size_t, 1>{});
break;
}
case 2:
{
f(std::integral_constant<std::size_t, 2>{});
break;
}
case 3:
{
f(std::integral_constant<std::size_t, 3>{});
break;
}
case 4:
{
f(std::integral_constant<std::size_t, 4>{});
break;
}
case 5:
{
f(std::integral_constant<std::size_t, 5>{});
break;
}
default: throw std::runtime_error("Unknown tensor size");
}
}
template <size_t NDim>
struct hip_index
{
size_t d[NDim];
__device__ __host__ size_t& operator[](size_t i) { return d[i]; }
__device__ __host__ size_t operator[](size_t i) const { return d[i]; }
};
template <size_t NDim>
struct hip_tensor_descriptor
{
__device__ __host__ hip_tensor_descriptor() = default;
template <typename T, typename V>
__device__ __host__ hip_tensor_descriptor(const T& lens_ext, const V& strides_ext)
{
for(size_t i = 0; i < NDim; i++)
lens[i] = lens_ext[i];
for(size_t i = 0; i < NDim; i++)
strides[i] = strides_ext[i];
}
__device__ __host__ hip_index<NDim> multi(size_t idx)
{
hip_index<NDim> result{};
size_t tidx = idx;
for(size_t is = 0; is < NDim; is++)
{
result[is] = tidx / strides[is];
tidx = tidx % strides[is];
}
return result;
}
__device__ __host__ size_t linear(hip_index<NDim> s)
{
size_t idx = 0;
for(size_t i = 0; i < NDim; i++)
idx += s[i] * strides[i];
return idx;
}
size_t lens[NDim] = {};
size_t strides[NDim] = {};
};
void hip_contiguous(migraph::shape output_shape, migraph::argument arg, migraph::argument result)
{
visit_all(result, arg)([&](auto output, auto input) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
const auto& s = arg.get_shape();
hip_tensor_descriptor<ndim> a_desc(s.lens(), s.strides());
hip_tensor_descriptor<ndim> at_desc(output_shape.lens(), output_shape.strides());
auto* a = input.data();
auto* at = output.data();
auto nelements = s.elements();
std::size_t nlocal = 512;
std::size_t nglobal = 512 * nlocal;
launch(nglobal, nlocal)([=](auto idx) mutable {
for(size_t i = idx.global; i < nelements; i += nglobal)
{
size_t lidx = a_desc.linear(at_desc.multi(i));
at[i] = a[lidx];
}
});
});
});
}
} // namespace gpu
} // namespace migraph
#ifndef MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/rocblas.hpp>
namespace migraph {
namespace gpu {
struct context
{
shared<miopen_handle> handle;
shared<rocblas_handle_ptr> rbhandle;
};
} // namespace gpu
} // namespace migraph
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment