Commit 98cd353f authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'gather_operator' into seq2seq_example

parents 4702c17e c21a1b28
......@@ -145,6 +145,7 @@ rocm_create_package(
DESCRIPTION "AMD's graph optimizer"
MAINTAINER "Paul Fultz II <paul.fultz@amd.com>"
LDCONFIG
PTH
DEPENDS miopen-hip rocblas hip_hcc half
)
......
......@@ -107,6 +107,10 @@ rocmtest tidy: rocmnode('rocmtest') { cmake_build ->
stage('Clang Release') {
cmake_build("hcc", "-DCMAKE_BUILD_TYPE=release")
}
stage('Clang Release Python 3') {
cmake_build("hcc", "-DCMAKE_BUILD_TYPE=release -DPYTHON_EXECUTABLE=/usr/local/bin/python3")
}
}, gcc5: rocmnode('rocmtest') { cmake_build ->
stage('GCC 5 Debug') {
cmake_build("g++-5", "-DCMAKE_BUILD_TYPE=debug")
......
......@@ -757,43 +757,81 @@ struct gather
// negative axis means counting dimensions from back
int axis_index = (axis < 0) ? (n_dim + axis) : axis;
auto type = inputs[0].type();
lens[axis_index] = inputs[1].elements();
return {type, lens};
}
auto type = inputs[0].type();
lens.erase(lens.begin() + axis_index);
if(!inputs[1].scalar())
{
auto ind_lens = inputs[1].lens();
lens.insert(lens.begin() + axis_index, ind_lens.begin(), ind_lens.end());
}
template <class T>
void compute_index(const T& out_idx,
const int axis_index,
const std::vector<std::size_t>& vec_indices,
const std::size_t max_dim,
T& in_idx) const
{
in_idx = out_idx;
std::size_t idx = vec_indices.at(out_idx[axis_index]);
if(idx >= max_dim)
// for scalar output
if(lens.size() == 0)
{
MIGRAPHX_THROW("Gather: indices are out of range in input tensor");
return {type, {1}, {0}};
}
in_idx[axis_index] = idx;
return {type, lens};
}
// template <class T>
// void compute_index(const T& out_idx,
// const int axis_index,
// const std::vector<std::size_t>& vec_indices,
// const std::size_t max_dim,
// T& in_idx) const
// {
// in_idx = out_idx;
// std::size_t idx = vec_indices.at(out_idx[axis_index]);
// if(idx >= max_dim)
// {
// MIGRAPHX_THROW("Gather: indices are out of range in input tensor");
// }
// in_idx[axis_index] = idx;
// }
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// negative axis means counting dimensions from back
int axis_index = (axis < 0) ? (output_shape.lens().size() + axis) : axis;
int axis_index = (axis < 0) ? (args[0].get_shape().lens().size() + axis) : axis;
// max dimension in axis
std::size_t max_dim = args[0].get_shape().lens()[axis_index];
std::vector<std::size_t> vec_indices;
args[1].visit([&](auto indices) { vec_indices.assign(indices.begin(), indices.end()); });
visit_all(result, args[0])([&](auto output, auto input) {
std::vector<std::size_t> in_idx;
shape_for_each(output.get_shape(), [&](const auto& idx) {
this->compute_index(idx, axis_index, vec_indices, max_dim, in_idx);
output(idx.begin(), idx.end()) = input(in_idx.begin(), in_idx.end());
// std::size_t max_dim = args[0].get_shape().lens()[axis_index];
// std::vector<std::size_t> vec_indices;
// args[1].visit([&](auto indices) { vec_indices.assign(indices.begin(), indices.end()); });
visit_all(result, args[0])([&](auto output, auto data) {
args[1].visit([&](auto indices) {
if(indices.get_shape().scalar())
{
if(output_shape.scalar())
{
output[0] = data[indices.front()];
}
else
{
shape_for_each(output.get_shape(), [&](const auto& out_idx) {
auto data_idx = out_idx;
data_idx.insert(data_idx.begin() + axis_index, indices.front());
output(out_idx.begin(), out_idx.end()) =
data(data_idx.begin(), data_idx.end());
});
}
}
else
{
auto ind_lens = indices.get_shape().lens();
shape_for_each(output.get_shape(), [&](const auto& out_idx) {
auto data_idx = out_idx;
auto start_it = data_idx.begin() + axis_index;
auto end_it = data_idx.begin() + axis_index + ind_lens.size();
std::vector<std::size_t> ind_idx(start_it, end_it);
data_idx.erase(start_it, end_it);
data_idx.insert(start_it, indices(ind_idx.begin(), ind_idx.end()));
output(out_idx.begin(), out_idx.end()) =
data(data_idx.begin(), data_idx.end());
});
}
});
});
......
......@@ -124,6 +124,8 @@ struct tensor_view
return m_data + this->size();
}
std::vector<T> to_vector() const { return std::vector<T>(this->begin(), this->end()); }
friend std::ostream& operator<<(std::ostream& os, const tensor_view<T>& x)
{
if(!x.empty())
......
......@@ -434,7 +434,15 @@ struct onnx_parser
attribute_map attributes,
const std::vector<instruction_ref>&)
{
literal v = parse_value(attributes.at("value"));
literal v = parse_value(attributes.at("value"));
auto dim_size = attributes.at("value").t().dims_size();
// if dim_size is 0, it is a scalar
if(dim_size == 0)
{
migraphx::shape scalar_shape{v.get_shape().type(), {1}, {0}};
return prog.add_literal(migraphx::literal{scalar_shape, v.data()});
}
return prog.add_literal(v);
}
......@@ -461,6 +469,18 @@ struct onnx_parser
{
transb = parse_value(attributes.at("transB")).at<bool>();
}
// beginning or end of both args have dimension 1, need to squeeze
// before calling gemm, then doing unsqueeze after getting results
std::size_t num_squeeze = args[0]->get_shape().lens().size();
if(num_squeeze > 2)
{
std::vector<int64_t> vec_axises(num_squeeze - 2);
std::iota(vec_axises.begin(), vec_axises.end(), 0);
args[0] = prog.add_instruction(op::squeeze{vec_axises}, args[0]);
args[1] = prog.add_instruction(op::squeeze{vec_axises}, args[1]);
}
std::vector<int64_t> perm = {1, 0};
auto l1 = (transa) ? prog.add_instruction(op::transpose{perm}, args[0]) : args[0];
auto l2 = (transb) ? prog.add_instruction(op::transpose{perm}, args[1]) : args[1];
......@@ -469,6 +489,13 @@ struct onnx_parser
if(beta != 0.f)
{
auto l3 = prog.add_instruction(op::dot{alpha}, l1, l2);
if(num_squeeze > 2)
{
std::vector<int64_t> vec_axises(num_squeeze - 2);
std::iota(vec_axises.begin(), vec_axises.end(), 0);
l3 = prog.add_instruction(op::unsqueeze{vec_axises}, l3);
}
auto l4 = args[2];
if(l4->get_shape().scalar()) // ignore args[2] (no C value added to alpha*A*B)
return l3;
......@@ -481,7 +508,16 @@ struct onnx_parser
return add_broadcastable_binary_op(l3, l4, op::add{});
}
}
return prog.add_instruction(op::dot{alpha, beta}, l1, l2);
auto dot_res = prog.add_instruction(op::dot{alpha, beta}, l1, l2);
if(num_squeeze > 2)
{
std::vector<int64_t> vec_axises(num_squeeze - 2);
std::iota(vec_axises.begin(), vec_axises.end(), 0);
dot_res = prog.add_instruction(op::unsqueeze{vec_axises}, dot_res);
}
return dot_res;
}
instruction_ref
......
......@@ -203,9 +203,8 @@ void memory_coloring_impl::rewrite()
if(is_allocate(ins))
{
assert(!ins->inputs().empty());
p_program->replace_instruction(
ins, op::load{ins->inputs().at(0)->get_shape(), offset}, scratch_param);
ins, op::load{ins->get_shape(), offset}, scratch_param);
}
else if(is_literal(ins))
{
......
option(MIGRAPHX_ENABLE_PYTHON "Enable python bindings" ON)
if(MIGRAPHX_ENABLE_PYTHON)
find_program(DEFAULT_PYTHON_EXE python)
if(DEFAULT_PYTHON_EXE)
set(PYTHON_EXECUTABLE ${DEFAULT_PYTHON_EXE} CACHE PATH "Path to python executable")
endif()
find_package(pybind11 REQUIRED)
pybind11_add_module(migraphx_py migraphx_py.cpp)
set_target_properties(migraphx_py PROPERTIES
OUTPUT_NAME migraphx
C_VISIBILITY_PRESET hidden
CXX_VISIBILITY_PRESET hidden
)
target_link_libraries(migraphx_py PRIVATE migraphx migraphx_onnx migraphx_cpu)
if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(migraphx_py PRIVATE migraphx_gpu)
target_compile_definitions(migraphx_py PRIVATE -DHAVE_GPU)
endif()
find_program(DEFAULT_PYTHON_EXE python)
if(DEFAULT_PYTHON_EXE)
set(PYTHON_EXECUTABLE ${DEFAULT_PYTHON_EXE} CACHE PATH "Path to python executable")
endif()
find_package(pybind11 REQUIRED)
pybind11_add_module(migraphx_py migraphx_py.cpp)
set_target_properties(migraphx_py PROPERTIES
OUTPUT_NAME migraphx
C_VISIBILITY_PRESET hidden
CXX_VISIBILITY_PRESET hidden
)
target_link_libraries(migraphx_py PRIVATE migraphx migraphx_onnx migraphx_cpu)
if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(migraphx_py PRIVATE migraphx_gpu)
target_compile_definitions(migraphx_py PRIVATE -DHAVE_GPU)
endif()
rocm_install_targets(TARGETS migraphx_py)
endif()
......@@ -28,6 +28,11 @@ struct throw_half
{
throw std::runtime_error("Half not supported in python yet.");
}
void operator()(migraphx::tensor_view<migraphx::half>) const
{
throw std::runtime_error("Half not supported in python yet.");
}
};
template <class F>
......@@ -42,6 +47,8 @@ struct skip_half
}
void operator()(migraphx::shape::as<migraphx::half>) const {}
void operator()(migraphx::tensor_view<migraphx::half>) const {}
};
template <class F>
......@@ -50,6 +57,12 @@ void visit_type(const migraphx::shape& s, F f)
s.visit_type(throw_half<F>{f});
}
template <class T, class F>
void visit(const migraphx::raw_data<T>& x, F f)
{
x.visit(throw_half<F>{f});
}
template <class F>
void visit_types(F f)
{
......@@ -60,6 +73,9 @@ template <class T>
py::buffer_info to_buffer_info(T& x)
{
migraphx::shape s = x.get_shape();
auto strides = s.strides();
std::transform(
strides.begin(), strides.end(), strides.begin(), [&](auto i) { return i * s.type_size(); });
py::buffer_info b;
visit_type(s, [&](auto as) {
b = py::buffer_info(x.data(),
......@@ -67,7 +83,7 @@ py::buffer_info to_buffer_info(T& x)
py::format_descriptor<decltype(as())>::format(),
s.lens().size(),
s.lens(),
s.strides());
strides);
});
return b;
}
......@@ -75,11 +91,20 @@ py::buffer_info to_buffer_info(T& x)
migraphx::shape to_shape(const py::buffer_info& info)
{
migraphx::shape::type_t t;
std::size_t n = 0;
visit_types([&](auto as) {
if(info.format == py::format_descriptor<decltype(as())>::format())
{
t = as.type_enum();
n = sizeof(as());
}
});
return migraphx::shape{t, info.shape, info.strides};
auto strides = info.strides;
std::transform(strides.begin(), strides.end(), strides.begin(), [&](auto i) -> std::size_t {
return n > 0 ? i / n : 0;
});
return migraphx::shape{t, info.shape, strides};
}
PYBIND11_MODULE(migraphx, m)
......@@ -108,6 +133,13 @@ PYBIND11_MODULE(migraphx, m)
py::buffer_info info = b.request();
new(&x) migraphx::argument(to_shape(info), info.ptr);
})
.def("get_shape", &migraphx::argument::get_shape)
.def("tolist",
[](migraphx::argument& x) {
py::list l{x.get_shape().elements()};
visit(x, [&](auto data) { l = py::cast(data.to_vector()); });
return l;
})
.def("__eq__", std::equal_to<migraphx::argument>{})
.def("__ne__", std::not_equal_to<migraphx::argument>{})
.def("__repr__", [](const migraphx::argument& x) { return migraphx::to_string(x); });
......
......@@ -16,22 +16,35 @@ argument gather(hipStream_t stream,
std::vector<migraphx::argument> args,
int axis)
{
int axis_index = (axis < 0) ? (axis + output_shape.lens().size()) : axis;
int axis_index = (axis < 0) ? (axis + args[0].get_shape().lens().size()) : axis;
visit_all(args.back(), args[0])([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
args[1].visit([&](auto indices) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
const auto* indices_ptr = device_cast(indices.data());
auto* outptr = device_cast(output.data());
const auto* inptr = device_cast(input.data());
hip_tensor_descriptor<ndim> desc_input(input.get_shape());
hip_tensor_descriptor<ndim> desc_output(output.get_shape());
gs_launch(stream, nelements)([=](auto i) {
auto lens = desc_output.multi(i);
lens[axis_index] = indices_ptr[lens[axis_index]];
outptr[i] = inptr[desc_input.linear(lens)];
const auto* indices_ptr = device_cast(indices.data());
auto* out_ptr = device_cast(output.data());
const auto* in_ptr = device_cast(input.data());
if(output_shape.scalar())
{
gs_launch(stream, 1)(
[=](auto i) { out_ptr[i] = in_ptr[static_cast<int>(indices_ptr[0])]; });
}
else
{
// if indices are a scalar, output has one dim smaller than input
auto& input_shape = args[0].get_shape();
auto lens = input_shape.lens();
lens[axis_index] = args[1].get_shape().elements();
migraphx::shape out_comp_shape{output_shape.type(), lens};
visit_tensor_size(out_comp_shape.lens().size(), [&](auto n_out_dim) {
hip_tensor_descriptor<n_out_dim> desc_input(input_shape);
hip_tensor_descriptor<n_out_dim> desc_output(out_comp_shape);
gs_launch(stream, nelements)([=](auto ii) {
auto in_idx = desc_output.multi(ii);
in_idx[axis_index] = indices_ptr[in_idx[axis_index]];
out_ptr[ii] = in_ptr[desc_input.linear(in_idx)];
});
});
});
}
});
});
......
......@@ -11,7 +11,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_NULL_STREAM)
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_NULL_STREAM)
struct hip_device
{
......@@ -40,7 +40,7 @@ struct hip_device
hipStream_t get()
{
if(enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
if(not enabled(MIGRAPHX_ENABLE_NULL_STREAM{}))
{
setup();
if(s == nullptr)
......@@ -53,7 +53,7 @@ struct hip_device
auto create_miopen_handle()
{
if(enabled(MIGRAPHX_DISABLE_NULL_STREAM{}))
if(not enabled(MIGRAPHX_ENABLE_NULL_STREAM{}))
return make_obj<miopen_handle>(&miopenCreateWithStream, get());
else
return make_obj<miopen_handle>(&miopenCreate);
......
......@@ -23,12 +23,13 @@ void copy_to_gpu(const argument& src, const argument& dst);
struct hip_allocate
{
shape s;
std::string tag{};
std::string name() const { return "hip::allocate"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs}.has(1);
return inputs.front();
check_shapes{inputs}.has(0);
return s;
}
argument compute(context&, const shape& output_shape, const std::vector<argument>&) const
{
......
......@@ -127,8 +127,7 @@ struct miopen_apply
}
else
{
auto is = prog->add_outline(s);
auto result = prog->insert_instruction(ins, hip_allocate{std::move(tag)}, is);
auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
return result;
}
}
......
......@@ -37,8 +37,7 @@ void write_literals::apply(program& p) const
{
literal l = ins->get_literal();
auto pre = p.add_literal(l);
auto s = p.add_outline(l.get_shape());
auto alloc = p.insert_instruction(std::next(pre), hip_allocate{}, s);
auto alloc = p.insert_instruction(std::next(pre), hip_allocate{l.get_shape()});
p.replace_instruction(ins, hip_copy{}, pre, alloc);
}
else
......
......@@ -21,8 +21,8 @@ struct allocate
std::string name() const { return "allocate"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs) const
{
migraphx::check_shapes{inputs, *this}.has(1);
return inputs.front();
migraphx::check_shapes{inputs, *this}.has(0);
return s;
}
migraphx::argument compute(migraphx::context&,
const migraphx::shape& output_shape,
......@@ -34,8 +34,7 @@ struct allocate
migraphx::instruction_ref add_alloc(migraphx::program& p, const migraphx::shape& s)
{
auto a0 = p.add_outline(s);
return p.add_instruction(allocate{}, a0);
return p.add_instruction(allocate{s});
}
bool no_allocate(const migraphx::program& p)
......
......@@ -235,7 +235,7 @@ TEST_CASE(gather)
migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
migraphx::shape indices{migraphx::shape::int32_type, {2, 3}};
int axis = 1;
expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 6, 4, 5}},
expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 2, 3, 4, 5}},
migraphx::op::gather{axis},
input,
indices);
......@@ -245,7 +245,7 @@ TEST_CASE(gather)
migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
migraphx::shape indices{migraphx::shape::int32_type, {2, 3}};
int axis = -4;
expect_shape(migraphx::shape{migraphx::shape::float_type, {6, 3, 4, 5}},
expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 3, 4, 5}},
migraphx::op::gather{axis},
input,
indices);
......
......@@ -4,6 +4,8 @@ find_package(PythonInterp)
function(add_py_test NAME SCRIPT)
set (ENV_COMMAND ${CMAKE_COMMAND} -E env
"PYTHONPATH=$<TARGET_FILE_DIR:migraphx_py>"
"PYTHONMALLOC=debug"
"MALLOC_CHECK_=3"
)
add_test(
NAME test_py_${NAME}
......@@ -15,7 +17,8 @@ endfunction()
add_dependencies(tests migraphx_py)
add_dependencies(check migraphx_py)
add_py_test(cpu cpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
add_py_test(cpu test_cpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
if(MIGRAPHX_ENABLE_GPU)
add_py_test(gpu gpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
add_py_test(gpu test_gpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
add_py_test(array test_array.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
endif()
import migraphx, struct, array, sys
try:
from functools import reduce
except:
pass
def assert_eq(x, y):
if x == y:
pass
else:
raise Exception(str(x) + " != " + str(y))
def read_float(b, index):
return struct.unpack_from('f', b, index*4)[0]
def write_float(b, index):
struct.pack_into('f', b, index*4)
def nelements(lens):
return reduce(lambda x,y: x*y,lens, 1)
def create_buffer(t, data, shape):
a = array.array(t, data)
if sys.version_info >= (3, 0):
m = memoryview(a.tobytes())
return m.cast(t, shape)
else:
m = memoryview(a.tostring())
return m
def check_argument(a):
l = a.tolist()
for i in range(len(l)):
assert_eq(l[i], read_float(a, i))
def check_shapes(r, m):
lens = list(m.shape)
strides = [int(s/m.itemsize) for s in m.strides]
elements = nelements(lens)
assert_eq(r.get_shape().elements(), elements)
assert_eq(r.get_shape().lens(), lens)
assert_eq(r.get_shape().strides(), strides)
def run(p):
params = {}
for key, value in p.get_parameter_shapes().items():
params[key] = migraphx.to_gpu(migraphx.generate_argument(value))
return migraphx.from_gpu(p.run(params))
def test_shape(shape):
data = list(range(nelements(shape)))
m = create_buffer('f', data, shape)
a = migraphx.argument(m)
check_shapes(a, m)
assert_eq(a.tolist(), data)
def test_input():
if sys.version_info >= (3, 0):
test_shape([4])
test_shape([2, 3])
else:
data = list(range(4))
m = create_buffer('f', data, [4])
a1 = migraphx.argument(m)
a2 = migraphx.argument(bytearray(a1))
check_shapes(a2, m)
assert_eq(a1.tolist(), m.tolist())
def test_output():
p = migraphx.parse_onnx("conv_relu_maxpool.onnx")
p.compile(migraphx.get_target("gpu"))
r1 = run(p)
r2 = run(p)
assert_eq(r1, r2)
assert_eq(r1.tolist(), r2.tolist())
check_argument(r1)
check_argument(r2)
m1 = memoryview(r1)
m2 = memoryview(r2)
check_shapes(r1, m1)
check_shapes(r2, m2)
test_input()
test_output()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment