Commit c321dd0c authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'int8_miopen_call' into op_capture

parents 36ab4520 e0e28ef7
......@@ -125,6 +125,8 @@ rocm_enable_cppcheck(
functionConst:*program.*
shadowFunction
shadowVar
shadowVariable
unsafeClassDivZero
definePrefix:*test/include/test.hpp
FORCE
INCONCLUSIVE
......
......@@ -99,13 +99,14 @@ rocmtest tidy: rocmnode('rocmtest') { cmake_build ->
| xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-5.0 -style=file {} | diff - {}\'
'''
}
}, clang: rocmnode('vega') { cmake_build ->
}, clang_debug: rocmnode('vega') { cmake_build ->
stage('Clang Debug') {
// TODO: Enanle integer
// TODO: Enable integer
def sanitizers = "undefined"
def debug_flags = "-g -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build("hcc", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'")
}
}, clang_release: rocmnode('vega') { cmake_build ->
stage('Clang Release') {
cmake_build("hcc", "-DCMAKE_BUILD_TYPE=release")
}
......
......@@ -353,14 +353,14 @@ MIGRAPHX_PRED_MATCHER(same_input_shapes, instruction_ref ins)
ins->inputs().begin(), ins->inputs().end(), [&](auto x) { return x->get_shape() == s; });
}
MIGRAPHX_BASIC_MATCHER(output, matcher_context& ctx, instruction_ref ins)
MIGRAPHX_BASIC_MATCHER(output, const matcher_context& ctx, instruction_ref ins)
{
if(ins->outputs().size() == 1)
return ins->outputs().front();
return ctx.not_found();
}
MIGRAPHX_BASIC_MATCHER(used_once, matcher_context& ctx, instruction_ref ins)
MIGRAPHX_BASIC_MATCHER(used_once, const matcher_context& ctx, instruction_ref ins)
{
if(ins->outputs().size() == 1)
return ins;
......@@ -411,7 +411,7 @@ inline auto nargs(std::size_t n)
inline auto arg(std::size_t i)
{
return make_basic_fun_matcher([=](matcher_context& ctx, instruction_ref ins) {
return make_basic_fun_matcher([=](const matcher_context& ctx, instruction_ref ins) {
if(i < ins->inputs().size())
return ins->inputs()[i];
return ctx.not_found();
......
......@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n
{
double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance;
auto error = rms_range(r1, r2);
// cppcheck-suppress uninitvar
if(out_error != nullptr)
*out_error = error;
return error <= threshold;
......
......@@ -1011,9 +1011,10 @@ struct onnx_parser
}
std::vector<operation> vec_actv_funcs(vec_names.size());
std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& fn) {
return map_actv_funcs[fn];
});
std::transform(vec_names.begin(),
vec_names.end(),
vec_actv_funcs.begin(),
[&](const auto& fn) { return map_actv_funcs[fn]; });
// To be added later
float clip = 0.0;
......@@ -1127,9 +1128,10 @@ struct onnx_parser
}
std::vector<operation> vec_actv_funcs(vec_names.size());
std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& name) {
return map_actv_funcs[name];
});
std::transform(vec_names.begin(),
vec_names.end(),
vec_actv_funcs.begin(),
[&](const auto& name) { return map_actv_funcs[name]; });
float clip = 0.0;
if(contains(attributes, "clip"))
......@@ -1299,9 +1301,10 @@ struct onnx_parser
}
std::vector<operation> vec_actv_funcs(vec_names.size());
std::transform(vec_names.begin(), vec_names.end(), vec_actv_funcs.begin(), [&](auto& name) {
return map_actv_funcs[name];
});
std::transform(vec_names.begin(),
vec_names.end(),
vec_actv_funcs.begin(),
[&](const auto& name) { return map_actv_funcs[name]; });
float clip = 0.0;
if(contains(attributes, "clip"))
......
......@@ -107,7 +107,7 @@ struct memory_coloring_impl
return ins->name() == "check_context";
}
static bool is_disjoin(live_range& range1, live_range& range2)
static bool is_disjoin(const live_range& range1, const live_range& range2)
{
if((range1.size == 0) || (range2.size == 0))
return false;
......
......@@ -242,7 +242,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_
// TODO: Check every element
assert(has_instruction(first));
std::for_each(first, last, [&](instruction& ins) { ins.clear_arguments(); });
assert(std::all_of(first, last, [&](instruction& ins) { return ins.outputs().empty(); }));
assert(std::all_of(first, last, [&](const instruction& ins) { return ins.outputs().empty(); }));
return impl->instructions.erase(first, last);
}
......
......@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
std::vector<float> ihc_data(ihc_shape.elements(), 0.0);
migraphx::shape pph_shape{type, {1, 3 * hidden_size}};
std::vector<float> pph_data(pph_shape.elements(), 0.0);
auto actv_funcs = lstm_actv_funcs(ins);
auto lstm_op = any_cast<op::lstm>(ins->get_operator());
......
......@@ -302,17 +302,17 @@ struct cpu_im2col
const std::size_t& stride_h = op.stride[0];
const std::size_t& stride_w = op.stride[1];
auto kdiv2_h = kernel_h / 2;
auto kdiv2_w = kernel_w / 2;
long kdiv2_h = long(kernel_h) / 2;
long kdiv2_w = long(kernel_w) / 2;
// calculate output sizes
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
// account for padding for the starting position of the input pixels
std::size_t iinput = kdiv2_h - pad_h;
long iinput = kdiv2_h - long(pad_h);
// loop over output pixels (ioutput, joutput)
for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
{
std::size_t jinput = kdiv2_w - pad_w;
long jinput = kdiv2_w - long(pad_w);
for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
{
// compute linear index for output
......@@ -321,8 +321,8 @@ struct cpu_im2col
dfor(channels,
kernel_h,
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
auto idx = iinput + koffset - kdiv2_h;
auto jdx = jinput + loffset - kdiv2_w;
auto idx = iinput + long(koffset) - kdiv2_h;
auto jdx = jinput + long(loffset) - kdiv2_w;
col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
? input(0, c, idx, jdx)
: 0;
......@@ -589,7 +589,7 @@ struct leaky_relu_op
std::string name() const { return "cpu::leaky_relu"; }
auto fcn() const
{
auto& a = op.alpha;
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : x * a; };
}
};
......@@ -600,7 +600,7 @@ struct elu_op
std::string name() const { return "cpu::elu"; }
auto fcn() const
{
auto& a = op.alpha;
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : a * std::expm1(x); };
}
};
......
......@@ -24,9 +24,11 @@ add_library(migraphx_device
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/tanh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/relu.cpp
device/add_relu.cpp
device/contiguous.cpp
device/logsoftmax.cpp
......@@ -71,9 +73,7 @@ add_library(migraphx_gpu
logsoftmax.cpp
contiguous.cpp
concat.cpp
relu.cpp
leaky_relu.cpp
tanh.cpp
batchnorm.cpp
write_literals.cpp
rocblas.cpp
......
......@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)
__device__ inline void dpp_reduce(float& x, sum)
{
#ifdef MIGRAPHX_USE_CLANG_TIDY
(void)x;
#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
x = 1;
#else
__asm__ volatile("s_nop 4\n"
"v_add_f32 %0 %0 %0 row_shr:1\n"
......
#include <migraphx/gpu/device/relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void relu(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return std::max<decltype(x)>(0, x); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/tanh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void tanh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::tanh(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_RELU_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void relu(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TANH_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void tanh(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_RELU_HPP
#define MIGRAPHX_GUARD_RTGLIB_RELU_HPP
#include <migraphx/shape.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/relu.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -10,24 +10,8 @@ namespace gpu {
struct context;
struct miopen_relu
struct hip_relu : unary_device<hip_relu, device::relu>
{
shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::relu"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_TANH_HPP
#define MIGRAPHX_GUARD_RTGLIB_TANH_HPP
#include <migraphx/shape.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/tanh.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct miopen_tanh
struct hip_tanh : unary_device<hip_tanh, device::tanh>
{
shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::tanh"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -86,10 +86,8 @@ struct miopen_apply
void init()
{
this->last = instruction::get_output_alias(std::prev(prog->end()));
add_miopen_simple_op<miopen_relu>("relu", make_relu);
add_miopen_simple_op<miopen_sigmoid>("sigmoid", make_sigmoid);
add_miopen_simple_op<miopen_abs>("abs", make_abs);
add_miopen_simple_op<miopen_tanh>("tanh", make_tanh);
add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);
......@@ -104,6 +102,7 @@ struct miopen_apply
add_generic_op<hip_tan>("tan");
add_generic_op<hip_sinh>("sinh");
add_generic_op<hip_cosh>("cosh");
add_generic_op<hip_tanh>("tanh");
add_generic_op<hip_asin>("asin");
add_generic_op<hip_acos>("acos");
add_generic_op<hip_atan>("atan");
......@@ -115,6 +114,7 @@ struct miopen_apply
add_generic_op<hip_rsqrt>("rsqrt");
add_generic_op<hip_pow>("pow");
add_generic_op<hip_sqdiff>("sqdiff");
add_generic_op<hip_relu>("relu");
add_generic_op<hip_sign>("sign");
add_extend_op<miopen_gemm, op::dot>("dot");
......
#include <migraphx/gpu/relu.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_relu::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_relu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment