Commit 7e297b13 authored by Paul's avatar Paul
Browse files

Merge

parents 86ea5e91 aa7ff911
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument topk_smallest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
argument topk_largest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_WHERE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_WHERE_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void where(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1,
const argument& arg2);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -14,7 +14,7 @@ namespace gpu { ...@@ -14,7 +14,7 @@ namespace gpu {
struct eliminate_workspace struct eliminate_workspace
{ {
std::string name() const { return "eliminate_workspace"; } std::string name() const { return "eliminate_workspace"; }
void apply(module& p) const; void apply(module& m) const;
}; };
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -16,7 +16,7 @@ struct fuse_ops ...@@ -16,7 +16,7 @@ struct fuse_ops
context* ctx = nullptr; context* ctx = nullptr;
bool fast_math = true; bool fast_math = true;
std::string name() const { return "gpu::fuse_ops"; } std::string name() const { return "gpu::fuse_ops"; }
void apply(module& p) const; void apply(module& m) const;
}; };
} // namespace gpu } // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP #define MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP
#include <migraphx/errors.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/value.hpp>
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
...@@ -15,17 +18,24 @@ namespace gpu { ...@@ -15,17 +18,24 @@ namespace gpu {
struct context; struct context;
void blas_shape(const shape& s);
template <class Op> template <class Op>
struct rocblas_gemm struct rocblas_gemm
{ {
Op op; Op op;
float alpha = 1;
float beta = 0;
bool int8_x4_format = true; bool int8_x4_format = true;
bool compute_fp32 = false;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
return pack_join(migraphx::reflect(self.op, f), return pack_join(migraphx::reflect(self.op, f),
pack(f(self.int8_x4_format, "int8_x4_format"))); pack(f(self.alpha, "alpha"),
f(self.beta, "beta"),
f(self.int8_x4_format, "int8_x4_format")));
} }
std::string name() const std::string name() const
...@@ -42,8 +52,30 @@ struct rocblas_gemm ...@@ -42,8 +52,30 @@ struct rocblas_gemm
std::vector<shape> in_shapes(inputs); std::vector<shape> in_shapes(inputs);
in_shapes.pop_back(); in_shapes.pop_back();
check_shapes{in_shapes, *this}.not_broadcasted(); check_shapes{in_shapes, *this}.not_broadcasted();
batch_not_transposed(inputs[0].strides()); blas_shape(inputs[0]);
batch_not_transposed(inputs[1].strides()); blas_shape(inputs[1]);
// if gemm and add are fused
if(in_shapes.size() > 2)
{
auto cmat_shape = in_shapes.back();
in_shapes.pop_back();
blas_shape(cmat_shape);
auto op_out_shape = op.compute_shape(in_shapes);
if(cmat_shape.lens() != op_out_shape.lens())
{
MIGRAPHX_THROW(this->name() + " : dimension mismatch, operand C: {" +
to_string_range(cmat_shape.lens()) +
"}, cannot add to operand A * B: {" +
to_string_range(op_out_shape.lens()) + "}");
}
if(cmat_shape.type() != op_out_shape.type())
{
MIGRAPHX_THROW(this->name() + " : operand C type mismatch, operand C is of type: " +
to_string(cmat_shape.type()) +
", it must be: " + to_string(op_out_shape.type()));
}
return op_out_shape;
}
return op.compute_shape(in_shapes); return op.compute_shape(in_shapes);
} }
...@@ -51,30 +83,21 @@ struct rocblas_gemm ...@@ -51,30 +83,21 @@ struct rocblas_gemm
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{ {
gemm(ctx, output_shape, args, op.alpha, op.beta, int8_x4_format); if(this->name() == "gpu::gemm")
return args.back();
}
void batch_not_transposed(const std::vector<std::size_t>& strides) const
{
if(strides.size() <= 2)
return;
auto dim_0 = strides.size() - 2;
auto matrix_size = std::max(strides[dim_0], strides[dim_0 + 1]);
std::vector<std::size_t> batch(strides.begin(), strides.begin() + dim_0);
if(std::all_of(batch.begin(), batch.end(), [&](auto i) { return (i < matrix_size); }))
{ {
MIGRAPHX_THROW("GPU_GEMM: matrix size and batch size {" + to_string_range(strides) + gemm(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32);
"} are transposed!");
} }
else
if(std::adjacent_find(batch.begin(), batch.end(), [&](auto i, auto j) {
return (i < j or i < matrix_size or j < matrix_size);
}) != batch.end())
{ {
MIGRAPHX_THROW("GPU_GEMM: batch size {" + to_string_range(strides) + gemm(ctx,
"} is transposed!"); output_shape,
args,
int32_t(alpha),
int32_t(beta),
int8_x4_format,
compute_fp32);
} }
return args.back();
} }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
......
...@@ -14,13 +14,15 @@ void gemm(context& ctx, ...@@ -14,13 +14,15 @@ void gemm(context& ctx,
const std::vector<argument>& args, const std::vector<argument>& args,
float alpha, float alpha,
float beta, float beta,
bool int8_x4_format); bool int8_x4_format,
bool compute_fp32);
void gemm(context& ctx, void gemm(context& ctx,
const shape& output_shape, const shape& output_shape,
const std::vector<argument>& args, const std::vector<argument>& args,
int32_t alpha, int32_t alpha,
int32_t beta, int32_t beta,
bool int8_x4_format); bool int8_x4_format,
bool compute_fp32);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
#ifndef MIGRAPHX_GUARD_RTGLIB_LOOP_HPP
#define MIGRAPHX_GUARD_RTGLIB_LOOP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/loop.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_loop
{
op::loop op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::loop"; }
shape compute_shape(std::vector<shape> inputs, std::vector<module_ref> mods) const;
argument
compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args,
const std::vector<module_ref>& mods,
const std::function<std::vector<argument>(
module_ref&, const std::unordered_map<std::string, argument>&)>& run) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <miopen/miopen.h> #include <miopen/miopen.h>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <sstream>
#ifdef HAS_FIND_MODE_API #ifdef HAS_FIND_MODE_API
extern "C" miopenStatus_t miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, extern "C" miopenStatus_t miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc,
int findMode); int findMode);
...@@ -132,12 +134,16 @@ inline convolution_descriptor make_deconv(const T& op) ...@@ -132,12 +134,16 @@ inline convolution_descriptor make_deconv(const T& op)
inline pooling_descriptor make_pooling(const migraphx::op::pooling& op) inline pooling_descriptor make_pooling(const migraphx::op::pooling& op)
{ {
miopenPoolingMode_t mode; miopenPoolingMode_t mode;
if(op.mode == "max") if(op.mode == op::pooling_mode::max)
mode = miopenPoolingMax; mode = miopenPoolingMax;
else if(op.mode == "average") else if(op.mode == op::pooling_mode::average)
mode = miopenPoolingAverage; mode = miopenPoolingAverage;
else else
MIGRAPHX_THROW("Unknown mode for pooling: " + op.mode); {
std::stringstream ss("Unknown mode for pooling: ");
ss << op.mode;
MIGRAPHX_THROW(ss.str());
}
auto p = make_obj<pooling_descriptor>(&miopenCreatePoolingDescriptor); auto p = make_obj<pooling_descriptor>(&miopenCreatePoolingDescriptor);
int kdims = op.kdims(); int kdims = op.kdims();
......
#ifndef MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP
#include <migraphx/op/multinomial.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_multinomial
{
op::multinomial op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::multinomial"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP
#define MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_nonzero
{
op::nonzero op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::nonzero"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -40,9 +40,8 @@ struct hip_prefix_scan_sum : oper<hip_prefix_scan_sum> ...@@ -40,9 +40,8 @@ struct hip_prefix_scan_sum : oper<hip_prefix_scan_sum>
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{ {
if(op.exclusive or op.reverse) device::prefix_scan_sum(
MIGRAPHX_THROW("Exclusive and reverse scan not supported"); ctx.get_stream().get(), args[1], args[0], op.axis, op.exclusive, op.reverse);
device::prefix_scan_sum(ctx.get_stream().get(), args[1], args[0], op.axis);
return args[1]; return args[1];
} }
......
#ifndef MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
#define MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
struct prefuse_ops
{
std::string name() const { return "gpu::prefuse_ops"; }
void apply(module& m) const;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP #define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/op/quant_convolution.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
...@@ -14,6 +15,7 @@ struct context; ...@@ -14,6 +15,7 @@ struct context;
struct miopen_quant_convolution struct miopen_quant_convolution
{ {
op::quant_convolution op; op::quant_convolution op;
bool int8_x4_format = false;
shared<convolution_descriptor> cd; shared<convolution_descriptor> cd;
miopenConvFwdAlgorithm_t algo{}; miopenConvFwdAlgorithm_t algo{};
miopenHandle_t handle = nullptr; miopenHandle_t handle = nullptr;
...@@ -22,7 +24,8 @@ struct miopen_quant_convolution ...@@ -22,7 +24,8 @@ struct miopen_quant_convolution
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
// TODO: Add algo // TODO: Add algo
return op::quant_convolution::reflect(self.op, f); return pack_join(migraphx::reflect(self.op, f),
pack(f(self.int8_x4_format, "int8_x4_format")));
} }
std::string name() const { return "gpu::quant_convolution"; } std::string name() const { return "gpu::quant_convolution"; }
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <migraphx/op/scatter.hpp> #include <migraphx/op/scatter_none.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
namespace migraphx { namespace migraphx {
...@@ -14,7 +14,9 @@ struct context; ...@@ -14,7 +14,9 @@ struct context;
struct hip_scatter struct hip_scatter
{ {
op::scatter op; // scatter_none is an exact replacement for previous op::scatter,
// renamed to match an Onnx option. Don't use base class op::scatter
op::scatter_none op;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
...@@ -22,7 +24,7 @@ struct hip_scatter ...@@ -22,7 +24,7 @@ struct hip_scatter
return migraphx::reflect(self.op, f); return migraphx::reflect(self.op, f);
} }
std::string name() const { return "gpu::scatter"; } std::string name() const { return "gpu::scatter_none"; }
shape compute_shape(std::vector<shape> inputs) const; shape compute_shape(std::vector<shape> inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
......
...@@ -17,9 +17,9 @@ struct schedule_model ...@@ -17,9 +17,9 @@ struct schedule_model
{ {
std::size_t streams = 0; std::size_t streams = 0;
std::size_t concurrency() const; std::size_t concurrency() const;
void sched(module& p, instruction_ref ins, std::size_t n) const; void sched(module& m, instruction_ref ins, std::size_t n) const;
void wait(module& p, instruction_ref ins, std::size_t wait_id) const; void wait(module& m, instruction_ref ins, std::size_t wait_id) const;
void record(module& p, instruction_ref ins, std::size_t wait_id) const; void record(module& m, instruction_ref ins, std::size_t wait_id) const;
std::size_t weight(const operation& op) const; std::size_t weight(const operation& op) const;
}; };
......
...@@ -15,7 +15,7 @@ namespace gpu { ...@@ -15,7 +15,7 @@ namespace gpu {
struct sync_device struct sync_device
{ {
std::string name() const { return "sync_device"; } std::string name() const { return "sync_device"; }
void apply(module& p) const; void apply(module& m) const;
}; };
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
#ifndef MIGRAPHX_GUARD_RTGLIB_TOPK_HPP
#define MIGRAPHX_GUARD_RTGLIB_TOPK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/topk.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
struct hip_topk
{
op::topk op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "gpu::topk"; }
shape compute_shape(std::vector<shape> inputs) const;
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_WHERE_HPP
#define MIGRAPHX_GUARD_RTGLIB_WHERE_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/where.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct hip_where : ternary_device<hip_where, device::where>
{
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4).same_dims();
auto s1 = inputs.at(1);
auto s2 = inputs.at(2);
if(s1 == s2 and s1.packed())
{
return s1;
}
else if(s1.packed() != s2.packed())
{
return s1.packed() ? s1 : s2;
}
else if(s1.broadcasted() != s2.broadcasted())
{
return s1.broadcasted() ? s2.with_lens(s1.lens()) : s1.with_lens(s1.lens());
}
else
{
return {s1.type(), s1.lens()};
}
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -14,7 +14,7 @@ struct write_literals ...@@ -14,7 +14,7 @@ struct write_literals
context* ctx = nullptr; context* ctx = nullptr;
std::string name() const { return "gpu::write_literals"; } std::string name() const { return "gpu::write_literals"; }
void apply(module& p) const; void apply(module& m) const;
}; };
} // namespace gpu } // namespace gpu
......
...@@ -5,10 +5,25 @@ namespace migraphx { ...@@ -5,10 +5,25 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape pack_int8_shape(const shape& s)
{
if(s.type() != shape::int8_type)
{
MIGRAPHX_THROW("PACK_INT8_ARGS: only process int8_type");
}
auto lens = s.lens();
auto strides = s.strides();
lens[1] = (lens[1] + 3) / 4 * 4;
strides[0] = strides[1] * lens[1];
return {s.type(), lens, strides};
}
shape miopen_int8_conv_pack::compute_shape(const std::vector<shape>& inputs) const shape miopen_int8_conv_pack::compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{{inputs.at(0)}, *this}.has(1).standard(); check_shapes{{inputs.at(0)}, *this}.has(1).standard();
return inputs.at(0); return pack_int8_shape(inputs.at(0));
} }
argument argument
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment