Commit 67c6e634 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into rnn_optimization

parents 0888f983 f93eeca3
...@@ -43,9 +43,7 @@ void eliminate_pad::update_op(T, ...@@ -43,9 +43,7 @@ void eliminate_pad::update_op(T,
std::vector<int64_t> pads = pad_op.pads; std::vector<int64_t> pads = pad_op.pads;
std::array<size_t, 2> new_pads{static_cast<size_t>(pads[2]), static_cast<size_t>(pads[3])}; std::array<size_t, 2> new_pads{static_cast<size_t>(pads[2]), static_cast<size_t>(pads[3])};
T op = any_cast<T>(ins->get_operator()); T op = any_cast<T>(ins->get_operator());
if(op.padding_mode != op::padding_mode_t::default_)
return;
op.padding = new_pads; op.padding = new_pads;
std::vector<instruction_ref> new_inputs{ins->inputs()}; std::vector<instruction_ref> new_inputs{ins->inputs()};
......
...@@ -44,51 +44,24 @@ struct convolution ...@@ -44,51 +44,24 @@ struct convolution
const shape& input = inputs.at(0); const shape& input = inputs.at(0);
const shape& weights = inputs.at(1); const shape& weights = inputs.at(1);
auto t = input.type(); auto t = input.type();
if(padding_mode == default_)
{ return {t,
return {t, {
{ input.lens()[0],
input.lens()[0], weights.lens()[0],
weights.lens()[0], std::size_t(std::max<std::ptrdiff_t>(
std::size_t(std::max<std::ptrdiff_t>( 1,
1, (input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) +
(input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) + 2 * padding[0]) /
2 * padding[0]) / stride[0] +
stride[0] + 1)),
1)), std::size_t(std::max<std::ptrdiff_t>(
std::size_t(std::max<std::ptrdiff_t>( 1,
1, (input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) +
(input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) + 2 * padding[1]) /
2 * padding[1]) / stride[1] +
stride[1] + 1)),
1)), }};
}};
}
else if(padding_mode == same)
{
return {t,
{input.lens()[0],
weights.lens()[0],
static_cast<std::size_t>(
std::ceil(static_cast<double>(input.lens()[2]) / stride[0])),
static_cast<std::size_t>(
std::ceil(static_cast<double>(input.lens()[3]) / stride[1]))}};
}
else if(padding_mode == valid)
{
return {
t,
{input.lens()[0],
weights.lens()[0],
static_cast<std::size_t>(std::ceil(
static_cast<double>(input.lens()[2] - weights.lens()[2] + 1) / stride[0])),
static_cast<std::size_t>(std::ceil(
static_cast<double>(input.lens()[3] - weights.lens()[3] + 1) / stride[1]))}};
}
else
{
MIGRAPHX_THROW("Invalid padding mode");
}
} }
}; };
......
...@@ -48,51 +48,21 @@ struct pooling ...@@ -48,51 +48,21 @@ struct pooling
assert(lengths[0] <= (input.lens()[2] + 2 * padding[0])); assert(lengths[0] <= (input.lens()[2] + 2 * padding[0]));
assert(lengths[1] <= (input.lens()[3] + 2 * padding[1])); assert(lengths[1] <= (input.lens()[3] + 2 * padding[1]));
if(padding_mode == default_) return {t,
{
return {t,
{
input.lens()[0],
input.lens()[1],
std::size_t(std::max<std::ptrdiff_t>(
1,
floor_divide<std::ptrdiff_t>(
input.lens()[2] + 2 * padding[0] - lengths[0], stride[0]) +
1)),
std::size_t(std::max<std::ptrdiff_t>(
1,
floor_divide<std::ptrdiff_t>(
input.lens()[3] + 2 * padding[1] - lengths[1], stride[1]) +
1)),
}};
}
else if(padding_mode == same)
{
return {t,
{input.lens()[0],
input.lens()[1],
ceil_divide<std::size_t>(input.lens()[2], stride[0]),
ceil_divide<std::size_t>(input.lens()[3], stride[1])}};
}
else if(padding_mode == valid)
{
return {
t,
{ {
input.lens()[0], input.lens()[0],
input.lens()[1], input.lens()[1],
std::size_t(std::max<std::ptrdiff_t>( std::size_t(std::max<std::ptrdiff_t>(
1, 1,
floor_divide<std::ptrdiff_t>(input.lens()[2] - lengths[0], stride[0]) + 1)), floor_divide<std::ptrdiff_t>(input.lens()[2] + 2 * padding[0] - lengths[0],
stride[0]) +
1)),
std::size_t(std::max<std::ptrdiff_t>( std::size_t(std::max<std::ptrdiff_t>(
1, 1,
floor_divide<std::ptrdiff_t>(input.lens()[3] - lengths[1], stride[1]) + 1)), floor_divide<std::ptrdiff_t>(input.lens()[3] + 2 * padding[1] - lengths[1],
stride[1]) +
1)),
}}; }};
}
else
{
MIGRAPHX_THROW("Invalid padding mode");
}
} }
}; };
......
...@@ -2,13 +2,24 @@ ...@@ -2,13 +2,24 @@
#define MIGRAPHX_GUARD_OPERATORS_PAD_CALC_HPP #define MIGRAPHX_GUARD_OPERATORS_PAD_CALC_HPP
#include <utility> #include <utility>
#include <cstdint>
#include <vector>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
inline std::size_t calculate_padding(std::size_t weight_dim, std::size_t dilation) inline void calculate_padding(int64_t idx,
std::vector<int64_t>& pads,
int64_t input_dim,
int64_t stride,
int64_t dilation,
int64_t weight_dim)
{ {
return (dilation * (weight_dim - 1)) / 2; int64_t output_dim = input_dim / stride;
int64_t pad = std::max(static_cast<int64_t>(0),
(output_dim - 1) * stride + dilation * weight_dim - input_dim);
pads[idx] = pad / 2;
pads[idx + 2] = pad - pad / 2;
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -19,7 +19,7 @@ rocm_install_targets( ...@@ -19,7 +19,7 @@ rocm_install_targets(
add_executable(read_onnx read_onnx.cpp) add_executable(read_onnx read_onnx.cpp)
rocm_clang_tidy_check(read_onnx) rocm_clang_tidy_check(read_onnx)
target_link_libraries(read_onnx migraphx_onnx) target_link_libraries(read_onnx migraphx_cpu migraphx_onnx)
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
......
...@@ -353,7 +353,8 @@ struct onnx_parser ...@@ -353,7 +353,8 @@ struct onnx_parser
{ {
// insert zeros for pad op (args[0] has 4 dims) // insert zeros for pad op (args[0] has 4 dims)
padding = {0, 0, padding[0], padding[1], 0, 0, padding[2], padding[3]}; padding = {0, 0, padding[0], padding[1], 0, 0, padding[2], padding[3]};
l0 = prog.add_instruction(op::pad{padding}, l0); l0 = prog.add_instruction(op::pad{padding, std::numeric_limits<float>::lowest()},
l0);
} }
else else
{ {
......
...@@ -64,9 +64,9 @@ host_type<T>* host_cast(T* x) ...@@ -64,9 +64,9 @@ host_type<T>* host_cast(T* x)
} }
template <class T> template <class T>
device_type<T> device_cast(T x) device_type<T> device_cast(const T& x)
{ {
return reinterpret_cast<device_type<T>>(x); return reinterpret_cast<const device_type<T>&>(x);
} }
template <class T> template <class T>
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <migraphx/gpu/device/pad.hpp> #include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/tensor.hpp> #include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp> #include <migraphx/gpu/device/launch.hpp>
#include <migraphx/float_equal.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -14,8 +15,17 @@ argument ...@@ -14,8 +15,17 @@ argument
pad(hipStream_t stream, argument result, argument arg1, float value, std::vector<std::int64_t> pads) pad(hipStream_t stream, argument result, argument arg1, float value, std::vector<std::int64_t> pads)
{ {
std::size_t nelements = arg1.get_shape().elements(); std::size_t nelements = arg1.get_shape().elements();
visit_all(result)([&](auto output) {
auto* outptr = device_cast(output.data());
using type = typename decltype(output)::value_type;
device_type<type> device_val = value;
if(float_equal(value, std::numeric_limits<float>::lowest()))
{
device_val = device_cast(std::numeric_limits<type>::lowest());
}
gs_launch(stream, result.get_shape().elements())([=](auto i) { outptr[i] = device_val; });
});
nary(stream, result)([=] { return value; });
visit_all(result, arg1)([&](auto output, auto input) { visit_all(result, arg1)([&](auto output, auto input) {
visit_tensor_size(result.get_shape().lens().size(), [&](auto ndim) { visit_tensor_size(result.get_shape().lens().size(), [&](auto ndim) {
std::size_t offsets[ndim]; std::size_t offsets[ndim];
......
...@@ -34,7 +34,7 @@ struct miopen_softmax ...@@ -34,7 +34,7 @@ struct miopen_softmax
return migraphx::reflect(self.op, f); return migraphx::reflect(self.op, f);
} }
std::string name() const { return "gpu::softmax"; } std::string name() const { return "miopen::softmax"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
......
...@@ -31,7 +31,7 @@ rocm_install_targets( ...@@ -31,7 +31,7 @@ rocm_install_targets(
add_executable(read_tf read_tf.cpp) add_executable(read_tf read_tf.cpp)
rocm_clang_tidy_check(read_tf) rocm_clang_tidy_check(read_tf)
target_link_libraries(read_tf migraphx_tf) target_link_libraries(read_tf migraphx_tf migraphx_cpu)
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
add_executable(verify_tf verify_tf.cpp) add_executable(verify_tf verify_tf.cpp)
......
...@@ -317,6 +317,7 @@ struct tf_parser ...@@ -317,6 +317,7 @@ struct tf_parser
} }
} }
auto l0 = args[0];
if(contains(attributes, "padding")) if(contains(attributes, "padding"))
{ {
const std::string& pad_mode = attributes.at("padding").s(); const std::string& pad_mode = attributes.at("padding").s();
...@@ -326,8 +327,24 @@ struct tf_parser ...@@ -326,8 +327,24 @@ struct tf_parser
std::vector<size_t> weight_dims = weights->get_shape().lens(); std::vector<size_t> weight_dims = weights->get_shape().lens();
size_t weight_h = weight_dims[2]; size_t weight_h = weight_dims[2];
size_t weight_w = weight_dims[3]; size_t weight_w = weight_dims[3];
op.padding[0] = calculate_padding(weight_h, op.dilation[0]);
op.padding[1] = calculate_padding(weight_w, op.dilation[1]); auto input_dims = l0->get_shape().lens();
size_t input_h = input_dims[2];
size_t input_w = input_dims[3];
std::vector<int64_t> pads(input_dims.size());
calculate_padding(0, pads, input_h, op.stride[0], op.dilation[0], weight_h);
calculate_padding(1, pads, input_w, op.stride[1], op.dilation[1], weight_w);
if(pads[0] != pads[2] || pads[1] != pads[3])
{
std::vector<int64_t> padding = {0, 0, pads[0], pads[1], 0, 0, pads[2], pads[3]};
l0 = prog.add_instruction(migraphx::op::pad{padding}, l0);
}
else
{
op.padding[0] = pads[0];
op.padding[1] = pads[1];
}
} }
else if(pad_mode.find("VALID") != std::string::npos) else if(pad_mode.find("VALID") != std::string::npos)
{ {
...@@ -350,7 +367,7 @@ struct tf_parser ...@@ -350,7 +367,7 @@ struct tf_parser
} }
} }
return prog.add_instruction(op, {args[0], weights}); return prog.add_instruction(op, {l0, weights});
} }
instruction_ref parse_depthwiseconv(const std::string&, instruction_ref parse_depthwiseconv(const std::string&,
...@@ -400,17 +417,35 @@ struct tf_parser ...@@ -400,17 +417,35 @@ struct tf_parser
} }
} }
auto l0 = args[0];
if(contains(attributes, "padding")) if(contains(attributes, "padding"))
{ {
const std::string& pad_mode = attributes.at("padding").s(); const std::string& pad_mode = attributes.at("padding").s();
std::vector<size_t> weight_dims = weights->get_shape().lens();
size_t weight_h = weight_dims[2];
size_t weight_w = weight_dims[3];
if(pad_mode.find("SAME") != std::string::npos) if(pad_mode.find("SAME") != std::string::npos)
{ {
op.padding_mode = op::padding_mode_t::same; op.padding_mode = op::padding_mode_t::same;
op.padding[0] = calculate_padding(weight_h, op.dilation[0]); std::vector<size_t> weight_dims = weights->get_shape().lens();
op.padding[1] = calculate_padding(weight_w, op.dilation[1]); size_t weight_h = weight_dims[2];
size_t weight_w = weight_dims[3];
auto input_dims = l0->get_shape().lens();
size_t input_h = input_dims[2];
size_t input_w = input_dims[3];
std::vector<int64_t> pads(input_dims.size());
calculate_padding(0, pads, input_h, op.stride[0], op.dilation[0], weight_h);
calculate_padding(1, pads, input_w, op.stride[1], op.dilation[1], weight_w);
if(pads[0] != pads[2] || pads[1] != pads[3])
{
std::vector<int64_t> padding = {0, 0, pads[0], pads[1], 0, 0, pads[2], pads[3]};
l0 = prog.add_instruction(migraphx::op::pad{padding}, l0);
}
else
{
op.padding[0] = pads[0];
op.padding[1] = pads[1];
}
} }
else if(pad_mode.find("VALID") != std::string::npos) else if(pad_mode.find("VALID") != std::string::npos)
{ {
...@@ -432,7 +467,7 @@ struct tf_parser ...@@ -432,7 +467,7 @@ struct tf_parser
auto cweights = prog.add_instruction(op::contiguous{}, weights); auto cweights = prog.add_instruction(op::contiguous{}, weights);
auto new_weights = prog.add_instruction(op::reshape{new_weights_shape}, cweights); auto new_weights = prog.add_instruction(op::reshape{new_weights_shape}, cweights);
return prog.add_instruction(op, {args[0], new_weights}); return prog.add_instruction(op, {l0, new_weights});
} }
instruction_ref instruction_ref
...@@ -567,21 +602,39 @@ struct tf_parser ...@@ -567,21 +602,39 @@ struct tf_parser
op.lengths[0] = ksize[2]; op.lengths[0] = ksize[2];
op.lengths[1] = ksize[3]; op.lengths[1] = ksize[3];
} }
auto l0 = args[0];
if(contains(attributes, "padding")) if(contains(attributes, "padding"))
{ {
const std::string& pad_mode = attributes.at("padding").s(); const std::string& pad_mode = attributes.at("padding").s();
if(pad_mode.find("SAME") != std::string::npos) if(pad_mode.find("SAME") != std::string::npos)
{ {
op.padding_mode = op::padding_mode_t::same; op.padding_mode = op::padding_mode_t::same;
op.padding[0] = calculate_padding(op.lengths[0], 1); auto input_dims = l0->get_shape().lens();
op.padding[1] = calculate_padding(op.lengths[1], 1); size_t input_h = input_dims[2];
size_t input_w = input_dims[3];
std::vector<int64_t> pads(input_dims.size());
calculate_padding(0, pads, input_h, op.stride[0], 1, op.lengths[0]);
calculate_padding(1, pads, input_w, op.stride[1], 1, op.lengths[1]);
if(pads[0] != pads[2] || pads[1] != pads[3])
{
std::vector<int64_t> padding = {0, 0, pads[0], pads[1], 0, 0, pads[2], pads[3]};
l0 = prog.add_instruction(
migraphx::op::pad{padding, std::numeric_limits<float>::lowest()}, l0);
}
else
{
op.padding[0] = pads[0];
op.padding[1] = pads[1];
}
} }
else if(pad_mode.find("VALID") != std::string::npos) else if(pad_mode.find("VALID") != std::string::npos)
{ {
op.padding_mode = op::padding_mode_t::valid; op.padding_mode = op::padding_mode_t::valid;
} }
} }
return prog.add_instruction(op, args[0]); return prog.add_instruction(op, l0);
} }
instruction_ref instruction_ref
......
...@@ -119,7 +119,7 @@ foreach(ONNX_TEST ${ONNX_TESTS}) ...@@ -119,7 +119,7 @@ foreach(ONNX_TEST ${ONNX_TESTS})
set(TEST_NAME test_${BASE_NAME}) set(TEST_NAME test_${BASE_NAME})
add_executable(${TEST_NAME} ${TES_ONNX_DIR}/${ONNX_TEST}) add_executable(${TEST_NAME} ${TES_ONNX_DIR}/${ONNX_TEST})
rocm_clang_tidy_check(${TEST_NAME}) rocm_clang_tidy_check(${TEST_NAME})
target_link_libraries(${TEST_NAME} migraphx_onnx) target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_cpu)
target_include_directories(${TEST_NAME} PUBLIC include) target_include_directories(${TEST_NAME} PUBLIC include)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/onnx) add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
add_dependencies(tests ${TEST_NAME}) add_dependencies(tests ${TEST_NAME})
...@@ -129,7 +129,7 @@ endforeach() ...@@ -129,7 +129,7 @@ endforeach()
# tf test # tf test
add_executable(test_tf tf/tf_test.cpp) add_executable(test_tf tf/tf_test.cpp)
rocm_clang_tidy_check(test_tf) rocm_clang_tidy_check(test_tf)
target_link_libraries(test_tf migraphx_tf) target_link_libraries(test_tf migraphx_tf migraphx_cpu)
target_include_directories(test_tf PUBLIC include) target_include_directories(test_tf PUBLIC include)
add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tf) add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tf)
add_dependencies(tests test_tf) add_dependencies(tests test_tf)
......
...@@ -83,23 +83,4 @@ TEST_CASE(rewrite_test_asymmetric) ...@@ -83,23 +83,4 @@ TEST_CASE(rewrite_test_asymmetric)
p.begin(), p.end(), [](const migraphx::instruction& ins) { return ins.name() == "pad"; })); p.begin(), p.end(), [](const migraphx::instruction& ins) { return ins.name() == "pad"; }));
} }
TEST_CASE(rewrite_test_same_padding)
{
migraphx::program p;
size_t img_dim[2] = {2, 2};
size_t channels = 1;
std::vector<int32_t> input(channels * img_dim[0] * img_dim[1]);
std::iota(input.begin(), input.end(), 0);
migraphx::shape s_img{migraphx::shape::int32_type, {1, channels, img_dim[0], img_dim[1]}};
auto l_img = p.add_literal(migraphx::literal{s_img, input});
auto padded_img = p.add_instruction(migraphx::op::pad{{0, 0, 1, 1, 0, 0, 1, 1}}, l_img);
create_conv(padded_img, channels, p, migraphx::op::padding_mode_t::same);
p.compile(eliminate_pad_target{});
EXPECT(std::any_of(
p.begin(), p.end(), [](const migraphx::instruction& ins) { return ins.name() == "pad"; }));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -1460,6 +1460,22 @@ struct test_pad : verify_program<test_pad> ...@@ -1460,6 +1460,22 @@ struct test_pad : verify_program<test_pad>
} }
}; };
struct test_pad_int8 : verify_program<test_pad_int8>
{
migraphx::program create_program() const
{
migraphx::program p;
std::vector<int8_t> data0 = {0, 1, 2, 3};
migraphx::shape s0{migraphx::shape::float_type, {2, 2}};
auto l0 = p.add_literal(migraphx::literal{s0, data0});
migraphx::op::pad op{};
op.value = std::numeric_limits<int8_t>::lowest();
op.pads = {0, 0, 1, 1};
p.add_instruction(op, l0);
return p;
}
};
struct test_pooling_autopad : verify_program<test_pooling_autopad> struct test_pooling_autopad : verify_program<test_pooling_autopad>
{ {
migraphx::program create_program() const migraphx::program create_program() const
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment