Commit a4f8d30b authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into layout-nhwc

parents d12efcd2 a8d86615
......@@ -9,6 +9,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
std::vector<char> read_buffer(const std::string& filename);
std::string read_string(const std::string& filename);
void write_buffer(const std::string& filename, const char* buffer, std::size_t size);
void write_buffer(const std::string& filename, const std::vector<char>& buffer);
......
File mode changed from 100644 to 100755
......@@ -39,7 +39,7 @@ struct flatten
std::string name() const { return "flatten"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
check_shapes{inputs, *this}.has(1).standard();
auto&& lens = inputs.front().lens();
auto x =
std::accumulate(lens.begin(), lens.begin() + axis, std::size_t{1}, std::multiplies<>{});
......
#ifndef MIGRAPHX_GUARD_OPERATORS_SCATTER_HPP
#define MIGRAPHX_GUARD_OPERATORS_SCATTER_HPP
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct scatter
{
int64_t axis = 0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.axis, "axis"));
}
value attributes() const
{
value normalize;
normalize["axis"] = value::array{normalize_attribute::include_min};
return {{"normalize_axes", normalize}};
}
std::string name() const { return "scatter"; }
shape normalize_compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).standard();
return inputs.front();
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// max dimension in axis
auto axis_dim_size = output_shape.lens()[axis];
visit_all(result, args[0], args[2])([&](auto output, auto data, auto update) {
std::copy(data.begin(), data.end(), output.begin());
args[1].visit([&](auto indices) {
auto ind_s = indices.get_shape();
shape_for_each(ind_s, [&](const auto& idx) {
auto out_idx = idx;
auto index = indices[ind_s.index(idx)];
index = (index < 0) ? index + axis_dim_size : index;
out_idx[axis] = index;
output[output_shape.index(out_idx)] = update[ind_s.index(idx)];
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -81,6 +81,7 @@
#include <migraphx/op/round.hpp>
#include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/scalar.hpp>
#include <migraphx/op/scatter.hpp>
#include <migraphx/op/sigmoid.hpp>
#include <migraphx/op/sign.hpp>
#include <migraphx/op/sinh.hpp>
......
......@@ -169,6 +169,12 @@ void copy(Range&& r, Iterator it)
std::copy(r.begin(), r.end(), it);
}
template <class Range, class Iterator, class F>
void transform(Range&& r, Iterator it, F f)
{
std::transform(r.begin(), r.end(), it, f);
}
template <class Range>
auto reverse(Range& r)
{
......
......@@ -393,6 +393,31 @@ struct value
return result;
}
template <class To>
To get(const std::string& pkey, const To& default_value) const
{
const auto* v = find(pkey);
if(v == this->end())
return default_value;
return v->to<To>();
}
template <class To>
std::vector<To> get(const std::string& pkey, const std::vector<To>& default_value) const
{
const auto* v = find(pkey);
if(v == this->end())
return default_value;
return v->to_vector<To>();
}
template <class To>
std::vector<To> get(const std::string& pkey,
const std::initializer_list<To>& default_value) const
{
return get<std::vector<To>>(pkey, default_value);
}
friend bool operator==(const value& x, const value& y);
friend bool operator!=(const value& x, const value& y);
friend bool operator<(const value& x, const value& y);
......
......@@ -42,14 +42,17 @@ struct parse_gemm : op_parser<parse_gemm>
// swap the last two elements
std::swap(*perm.rbegin(), *(perm.rbegin() + 1));
auto l1 = args[0];
auto l1 = args[0];
auto dot_type = l1->get_shape().type();
if(alpha != 1.0f)
{
auto alpha_literal = info.add_literal(alpha);
auto alpha_l1 = info.add_broadcastable_binary_op("mul", alpha_literal, l1);
l1 = info.add_instruction(make_op("convert", {{"target_type", l1->get_shape().type()}}),
alpha_l1);
l1 = info.add_broadcastable_binary_op("mul", alpha_literal, l1);
if(l1->get_shape().type() != dot_type)
{
l1 = info.add_instruction(make_op("convert", {{"target_type", dot_type}}), l1);
}
}
l1 = (transa) ? info.add_instruction(make_op("transpose", {{"dims", perm}}), l1) : l1;
......@@ -69,13 +72,16 @@ struct parse_gemm : op_parser<parse_gemm>
l3 = info.add_instruction(
make_op("multibroadcast", {{"output_lens", out_lens}}), args[2]);
}
auto beta_literal = info.add_literal(beta);
auto beta_broadcast = info.add_instruction(
make_op("multibroadcast", {{"output_lens", out_lens}}), beta_literal);
l3 = info.add_instruction(make_op("mul"), l3, beta_broadcast);
auto beta_literal = info.add_literal(beta);
auto beta_l3 = info.add_broadcastable_binary_op("mul", l3, beta_literal);
if(beta_l3->get_shape().type() != dot_type)
{
beta_l3 = info.add_instruction(make_op("convert", {{"target_type", dot_type}}),
beta_l3);
}
return info.add_instruction(
make_op("dot", {{"alpha", 1.0f}, {"beta", 1.0f}}), l1, l2, l3);
make_op("dot", {{"alpha", 1.0f}, {"beta", 1.0f}}), l1, l2, beta_l3);
}
}
......
......@@ -35,6 +35,8 @@ struct parse_generic_op : op_parser<parse_generic_op>
{"Reciprocal", "recip"},
{"Relu", "relu"},
{"Round", "round"},
{"Scatter", "scatter"},
{"ScatterElements", "scatter"},
{"Sigmoid", "sigmoid"},
{"Sign", "sign"},
{"Sin", "sin"},
......@@ -47,7 +49,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
bool needs_contiguous(const std::string& op_name) const
{
return contains({"gather"}, op_name);
return contains({"flatten", "gather", "scatter"}, op_name);
}
instruction_ref parse(const op_desc& opd,
......
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
from .backend import is_compatible, prepare, run, supports_device
......@@ -82,9 +82,6 @@ class MIGraphXBackend(Backend):
elif isinstance(model, migraphx.program):
return MIGraphXBackendRep(model, cls._input_names)
elif isinstance(model, (str, bytes)):
for k, v in kwargs.items():
if hasattr(options, k):
setattr(options, k, v)
if device is not None and not cls.supports_device(device):
raise RuntimeError(
"Incompatible device expected '{0}', got '{1}'".format(
......
......@@ -73,6 +73,7 @@ add_library(migraphx_device
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
......@@ -118,6 +119,7 @@ add_library(migraphx_gpu
code_object_op.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_pointwise.cpp
concat.cpp
convert.cpp
convolution.cpp
......@@ -145,8 +147,9 @@ add_library(migraphx_gpu
reverse.cpp
rnn_variable_seq_lens.cpp
rocblas.cpp
softmax.cpp
scatter.cpp
schedule_model.cpp
softmax.cpp
sync_device.cpp
target.cpp
write_literals.cpp
......@@ -204,6 +207,7 @@ register_migraphx_gpu_ops(hip_
reverse
round
rsqrt
scatter
sigmoid
sign
sinh
......@@ -310,6 +314,8 @@ target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver)
rocm_install_targets(
TARGETS migraphx_gpu migraphx_device
INCLUDE
......
File mode changed from 100644 to 100755
......@@ -3,6 +3,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp>
#include <migraphx/env.hpp>
#include <cassert>
namespace migraphx {
......@@ -21,6 +22,9 @@ bool is_hip_clang_compiler()
return result;
}
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
{
......@@ -41,9 +45,12 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{
params += " --cuda-gpu-arch=" + arch;
params += " --cuda-device-only";
params += " -O3 ";
params += " -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " ";
}
if(enabled(MIGRAPHX_GPU_DEBUG{}))
params += " -DMIGRAPHX_DEBUG";
params += " -Wno-unused-command-line-argument -Wno-cuda-compat ";
params += MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_FLAGS);
......
......@@ -82,9 +82,12 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
});
srcs.push_back(src_file{fs::path{"main.cpp"},
std::make_pair(content.data(), content.data() + content.size())});
auto args_hpp = generate_args_hpp(options.inputs);
auto args_hpp =
generate_args_hpp(options.reduced_inputs.empty() ? options.inputs : options.reduced_inputs);
srcs.push_back(src_file{fs::path{"args.hpp"},
std::make_pair(args_hpp.data(), args_hpp.data() + args_hpp.size())});
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " -I.";
auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name());
if(cos.size() != 1)
......
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
using namespace migraphx;
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
}
}
int main() {}
)__migraphx__";
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local = 1024)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
operation compile_pointwise(context&, const std::vector<shape>& inputs, const std::string& lambda)
{
hip_compile_options options;
options.global = compute_global(inputs.front().elements());
options.local = 1024;
options.inputs = inputs;
options.output = inputs.back();
options.reduced_inputs = reduce_dims(inputs);
auto src = interpolate_string(pointwise_kernel,
{{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"lambda", lambda}});
return compile_hip_code_object(src, options);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -352,7 +352,8 @@ bool broadcastable(bool& divisible_by_4,
auto b_len = result.get_shape().lens()[b_idx];
auto b_stride = result.get_shape().strides()[b_idx];
assert(bshape.lens()[b_idx] == b_len);
if(b_len <= max_size and std::none_of(std::next(b_it), strides.end(), not_zero))
if(b_len <= max_size and std::none_of(std::next(b_it), strides.end(), not_zero) and
is_divisor_encodable(b_stride * b_len))
{
divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and
......
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/scatter.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument scatter(
hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis)
{
auto ds = arg0.get_shape();
auto inds = arg1.get_shape();
auto axis_dim_size = ds.lens()[axis];
hip_visit_all(result, arg0, inds)([&](auto output, auto data, auto s1) {
auto* output_ptr = device_cast(output.data());
const auto* data_ptr = device_cast(data.data());
gs_launch(stream, ds.elements())([=](auto i) __device__ { output_ptr[i] = data_ptr[i]; });
hip_visit_all(arg1, arg2)([&](auto indices, auto update) {
const auto* upd_ptr = device_cast(update.data());
const auto* indices_ptr = device_cast(indices.data());
gs_launch(stream, inds.elements())([=](auto i) __device__ {
auto out_idx = s1.multi(i);
auto index = indices_ptr[i];
index = index < 0 ? index + axis_dim_size : index;
out_idx[axis] = index;
output[out_idx] = upd_ptr[i];
});
});
});
return result;
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
add_executable(gpu-driver
action.cpp
compile_pointwise.cpp
main.cpp
parser.cpp
perf.cpp
run_op.cpp
)
target_include_directories(gpu-driver PRIVATE include)
target_link_libraries(gpu-driver PRIVATE migraphx_gpu)
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/errors.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
auto& action_map()
{
static std::unordered_map<std::string, action_function> m;
return m;
}
action_function get_action(const std::string& name)
{
if(action_map().count(name) == 0)
MIGRAPHX_THROW("Missing action: " + name);
return action_map().at(name);
}
void register_action(const std::string& name, const action_function& a) { action_map()[name] = a; }
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment