Unverified Commit 6bc31506 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Merge branch 'develop' into multiply-add

parents 3043afe5 7534546a
......@@ -28,10 +28,32 @@ inline namespace MIGRAPHX_INLINE_NS {
#define MIGRAPHX_DRIVER_STATIC static
#endif
template <class T>
using bare = std::remove_cv_t<std::remove_reference_t<T>>;
namespace detail {
template <class T>
auto is_container(int, T&& x) -> decltype(x.insert(x.end(), *x.begin()), std::true_type{});
template <class T>
std::false_type is_container(float, T&&);
} // namespace detail
template <class T>
struct is_container : decltype(detail::is_container(int(0), std::declval<T>()))
{
};
template <class T>
using is_multi_value =
std::integral_constant<bool, (is_container<T>{} and not std::is_convertible<T, std::string>{})>;
template <class T>
struct value_parser
{
template <MIGRAPHX_REQUIRES(not std::is_enum<T>{})>
template <MIGRAPHX_REQUIRES(not std::is_enum<T>{} and not is_multi_value<T>{})>
static T apply(const std::string& x)
{
T result;
......@@ -43,7 +65,7 @@ struct value_parser
return result;
}
template <MIGRAPHX_REQUIRES(std::is_enum<T>{})>
template <MIGRAPHX_REQUIRES(std::is_enum<T>{} and not is_multi_value<T>{})>
static T apply(const std::string& x)
{
std::ptrdiff_t i;
......@@ -54,6 +76,15 @@ struct value_parser
throw std::runtime_error("Failed to parse: " + x);
return static_cast<T>(i);
}
template <MIGRAPHX_REQUIRES(is_multi_value<T>{} and not std::is_enum<T>{})>
static T apply(const std::string& x)
{
T result;
using value_type = typename T::value_type;
result.insert(result.end(), value_parser<value_type>::apply(x));
return result;
}
};
struct argument_parser
......@@ -69,6 +100,18 @@ struct argument_parser
unsigned nargs = 1;
};
template <class T, MIGRAPHX_REQUIRES(is_multi_value<T>{})>
std::string as_string_value(const T& x)
{
return to_string_range(x);
}
template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})>
std::string as_string_value(const T& x)
{
return to_string(x);
}
template <class T, class... Fs>
void operator()(T& x, const std::vector<std::string>& flags, Fs... fs)
{
......@@ -81,7 +124,7 @@ struct argument_parser
argument& arg = arguments.back();
arg.type = migraphx::get_type_name<T>();
arg.default_value = to_string(x);
arg.default_value = as_string_value(x);
migraphx::each_args([&](auto f) { f(x, arg); }, fs...);
}
......@@ -127,7 +170,7 @@ struct argument_parser
MIGRAPHX_DRIVER_STATIC auto append()
{
return write_action([](auto&, auto& x, auto& params) {
using type = typename decltype(params)::value_type;
using type = typename bare<decltype(params)>::value_type;
std::transform(params.begin(),
params.end(),
std::inserter(x, x.end()),
......
......@@ -8,6 +8,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_pad.hpp>
......@@ -80,11 +81,13 @@ struct compiler
{
loader l;
bool gpu = true;
std::vector<std::string> fill1;
void parse(argument_parser& ap)
{
l.parse(ap);
ap(gpu, {"--gpu"}, ap.help("Compile on the gpu"), ap.set_value(true));
ap(gpu, {"--cpu"}, ap.help("Compile on the cpu"), ap.set_value(false));
ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append());
}
program compile()
......@@ -94,7 +97,14 @@ struct compiler
return p;
}
auto params(const program& p) { return create_param_map(p, gpu); }
auto params(const program& p)
{
program::parameter_map m;
for(auto&& s : fill1)
m[s] = fill_argument(p.get_parameter_shape(s), 1);
fill_param_map(m, p, gpu);
return m;
}
};
struct read : command<read>
......@@ -109,6 +119,19 @@ struct read : command<read>
}
};
struct params : command<params>
{
loader l;
void parse(argument_parser& ap) { l.parse(ap); }
void run()
{
auto p = l.load();
for(auto&& param : p.get_parameter_shapes())
std::cout << param.first << ": " << param.second << std::endl;
}
};
struct verify : command<verify>
{
loader l;
......
......@@ -11,6 +11,23 @@ namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu)
{
for(auto&& x : p.get_parameter_shapes())
{
argument& arg = m[x.first];
if(arg.empty())
arg = generate_argument(x.second);
#ifdef HAVE_GPU
if(gpu)
arg = gpu::to_gpu(arg);
#else
(void)gpu;
#endif
}
return m;
}
program::parameter_map create_param_map(const program& p, bool gpu)
{
program::parameter_map m;
......
......@@ -7,6 +7,7 @@ namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu);
program::parameter_map create_param_map(const program& p, bool gpu = true);
void compile_program(program& p, bool gpu = true);
......
......@@ -3,6 +3,17 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
argument fill_argument(shape s, unsigned long value)
{
argument result;
s.visit_type([&](auto as) {
using type = typename decltype(as)::type;
auto v = fill_tensor_data<type>(s, value);
result = {s, [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
});
return result;
}
argument generate_argument(shape s, unsigned long seed)
{
argument result;
......
......@@ -87,6 +87,16 @@ std::vector<T> generate_tensor_data(const migraphx::shape& s, unsigned long seed
return result;
}
template <class T>
std::vector<T> fill_tensor_data(const migraphx::shape& s, unsigned long value = 0)
{
std::vector<T> result(s.elements());
std::generate(result.begin(), result.end(), [=] { return value; });
return result;
}
argument fill_argument(shape s, unsigned long value = 0);
argument generate_argument(shape s, unsigned long seed = 0);
literal generate_literal(shape s, unsigned long seed = 0);
......
......@@ -23,9 +23,10 @@ using bool_c = std::integral_constant<bool, B>;
#ifdef CPPCHECK
#define MIGRAPHX_REQUIRES(...) class = void
#else
#define MIGRAPHX_REQUIRES(...) \
bool MIGRAPHX_REQUIRES_VAR() = true, \
typename std::enable_if<(MIGRAPHX_REQUIRES_VAR() && (migraphx::and_<__VA_ARGS__>{})), \
#define MIGRAPHX_REQUIRES(...) \
long MIGRAPHX_REQUIRES_VAR() = __LINE__, \
typename std::enable_if<(MIGRAPHX_REQUIRES_VAR() == __LINE__ && \
(migraphx::and_<__VA_ARGS__>{})), \
int>::type = 0
#endif
......
......@@ -34,6 +34,7 @@ add_library(migraphx_device
device/contiguous.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/sigmoid.cpp
device/convert.cpp
device/mul.cpp
device/concat.cpp
......@@ -78,7 +79,6 @@ add_library(migraphx_gpu
batchnorm.cpp
write_literals.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
......
......@@ -245,8 +245,7 @@ void reduce_standard_impl(hipStream_t stream,
T init,
Input read_input,
Output read_output,
std::size_t relements,
std::size_t stride)
std::size_t relements)
{
hip_visit_all(result, arg)([&](auto output, auto input) {
auto nelements = result.get_shape().elements();
......@@ -255,7 +254,7 @@ void reduce_standard_impl(hipStream_t stream,
const std::size_t block_size = compute_block_size(relements, max_block_size);
gs_launch(stream, nelements * block_size, block_size)([=](auto i, auto idx) __device__ {
const auto out_idx = i / block_size;
const auto base_idx = out_idx * stride;
const auto base_idx = out_idx * relements;
auto r = block_reduce<max_block_size>(idx, op, init, relements, [&](auto j) __device__ {
return read_input(input.data()[base_idx + j]);
});
......@@ -276,25 +275,15 @@ void reduce(hipStream_t stream,
{
auto&& output_shape = result.get_shape();
auto&& input_shape = arg.get_shape();
assert(output_shape.lens().size() == input_shape.lens().size());
if(input_shape.standard() and output_shape.standard() and
output_shape.lens().back() != input_shape.lens().back() and
std::equal(output_shape.lens().begin(),
std::prev(output_shape.lens().end()),
input_shape.lens().begin()))
{
std::size_t stride = std::accumulate(input_shape.strides().begin(),
input_shape.strides().end(),
1,
std::multiplies<size_t>());
reduce_standard_impl(stream,
result,
arg,
op,
init,
read_input,
read_output,
input_shape.lens().back(),
stride);
reduce_standard_impl(
stream, result, arg, op, init, read_input, read_output, input_shape.lens().back());
}
else
{
......
#include <migraphx/gpu/device/sigmoid.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void sigmoid(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return 1.f / (1.f + ::exp(to_hip_type(-x))); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SIGMOID_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void sigmoid(hipStream_t stream, const argument& result, const argument& arg);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
#define MIGRAPHX_GUARD_RTGLIB_SIGMOID_HPP
#include <migraphx/shape.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/sigmoid.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct miopen_sigmoid
struct hip_sigmoid : unary_device<hip_sigmoid, device::sigmoid>
{
shared<activation_descriptor> ad;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return gpu::reflect(self.ad.get(), f);
}
std::string name() const { return "gpu::sigmoid"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
......
......@@ -86,7 +86,6 @@ struct miopen_apply
void init()
{
this->last = instruction::get_output_alias(std::prev(prog->end()));
add_miopen_simple_op<miopen_sigmoid>("sigmoid", make_sigmoid);
add_miopen_simple_op<miopen_abs>("abs", make_abs);
add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
......@@ -116,6 +115,7 @@ struct miopen_apply
add_generic_op<hip_sqdiff>("sqdiff");
add_generic_op<hip_relu>("relu");
add_generic_op<hip_sign>("sign");
add_generic_op<hip_sigmoid>("sigmoid");
add_extend_op<miopen_gemm, op::dot>("dot");
add_extend_op<rocblas_quant_gemm, op::quant_dot>("quant_dot");
......
......@@ -8,51 +8,6 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
template <class... Ts>
rocblas_status generic_rocblas_gemm_ex(Ts&&... xs)
{
return rocblas_gemm_ex(std::forward<Ts>(xs)...);
}
template <class... Ts>
rocblas_status generic_rocblas_batched_gemm_ex(Ts&&... xs)
{
return rocblas_gemm_strided_batched_ex(std::forward<Ts>(xs)...);
}
template <class T>
struct compute_rocblas_type
{
using type = T;
};
template <class T>
struct compute_rocblas_type<const T>
{
using type = const typename compute_rocblas_type<T>::type;
};
template <>
struct compute_rocblas_type<half>
{
using type = rocblas_half;
};
template <class T>
using rb_type = typename compute_rocblas_type<T>::type;
template <class T>
rb_type<T> to_rocblas_type(T x)
{
return reinterpret_cast<const rb_type<T>&>(x);
}
template <class T>
rb_type<T>* to_rocblas_type(T* x)
{
return reinterpret_cast<rb_type<T>*>(x);
}
shape rocblas_quant_gemm::compute_shape(const std::vector<shape>& inputs) const
{
std::vector<shape> in_shapes(inputs);
......@@ -102,13 +57,13 @@ argument rocblas_quant_gemm::compute(context& ctx,
auto a_lens = args[0].get_shape().lens();
auto b_lens = args[1].get_shape().lens();
output_shape.visit_type([&](auto as) {
auto alpha_r = to_rocblas_type(as(op.alpha));
auto beta_r = to_rocblas_type(as(beta));
auto alpha_r = as(op.alpha);
auto beta_r = as(beta);
auto out_lens = output_shape.lens();
rocblas_int m = out_lens[dim_0];
rocblas_int n = out_lens[dim_1];
rocblas_int k = args[0].get_shape().lens()[dim_1];
auto to_pointer = [&](auto&& arg) { return to_rocblas_type(as.from(arg.data())); };
auto to_pointer = [&](auto&& arg) { return as.from(arg.data()); };
assert(k % 4 == 0);
auto num_matrices = std::accumulate(
......@@ -119,36 +74,36 @@ argument rocblas_quant_gemm::compute(context& ctx,
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm_ex(ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none,
n,
m,
k,
&alpha_r,
to_pointer(args.at(1)),
rocblas_datatype_i8_r,
ldb,
to_pointer(args.at(0)),
rocblas_datatype_i8_r,
lda,
&beta_r,
to_pointer(args[2]),
rocblas_datatype_i32_r,
ldc,
is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
rocblas_datatype_i32_r,
ldc,
rocblas_datatype_i32_r,
rocblas_gemm_algo_standard,
0,
0,
nullptr,
nullptr);
rocblas_gemm_ex(ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none,
n,
m,
k,
&alpha_r,
to_pointer(args.at(1)),
rocblas_datatype_i8_r,
ldb,
to_pointer(args.at(0)),
rocblas_datatype_i8_r,
lda,
&beta_r,
to_pointer(args[2]),
rocblas_datatype_i32_r,
ldc,
is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
rocblas_datatype_i32_r,
ldc,
rocblas_datatype_i32_r,
rocblas_gemm_algo_standard,
0,
0,
nullptr,
nullptr);
}
else
{
generic_rocblas_batched_gemm_ex(
rocblas_gemm_strided_batched_ex(
ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none,
......
#include <migraphx/gpu/sigmoid.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_sigmoid::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_sigmoid::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -3810,6 +3810,18 @@ struct test_reduce_mean : verify_program<test_reduce_mean>
};
};
struct test_reduce_mean2 : verify_program<test_reduce_mean2>
{
migraphx::program create_program() const
{
migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {1, 128, 768}};
auto x = p.add_parameter("x", s);
p.add_instruction(migraphx::op::reduce_mean{{2}}, x);
return p;
};
};
struct test_reduce_mean_int : verify_program<test_reduce_mean_int>
{
migraphx::program create_program() const
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment