Commit 4a39a0f7 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into add-conv_bn_add-test

parents 5564172e bb827865
#include <migraphx/gpu/driver/parser.hpp>
#include <migraphx/gpu/driver/action.hpp>
#include <iostream>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
[[noreturn]] void error(const std::string& msg)
{
std::cout << msg << std::endl;
std::abort();
}
shape parser::parse_shape(const value& v) const
{
auto lens = get(v, "lens", std::vector<std::size_t>{});
auto strides = get(v, "strides", std::vector<std::size_t>{});
auto type = shape::parse_type(get<std::string>(v, "type", "float"));
if(strides.empty())
return shape{type, lens};
else
return shape{type, lens, strides};
}
std::vector<shape> parser::parse_shapes(const value& v) const
{
std::vector<shape> result;
std::transform(
v.begin(), v.end(), std::back_inserter(result), [&](auto&& x) { return parse_shape(x); });
return result;
}
void parser::load_settings(const value& v)
{
if(v.contains("settings"))
settings = v.at("settings");
}
void parser::process(const value& v)
{
if(not v.is_object())
error("Input is not an object");
parser p{};
p.load_settings(v);
for(auto&& pp : v)
{
if(pp.get_key() == "settings")
continue;
get_action(pp.get_key())(p, pp.without_key());
}
}
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/context.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/time.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsigned long seed = 0)
{
std::vector<argument> args;
std::transform(shapes.begin(), shapes.end(), std::back_inserter(args), [&](auto& s) {
return to_gpu(generate_argument(s, seed++));
});
return args;
}
using milliseconds = std::chrono::duration<double, std::milli>;
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n)
{
// TODO: Use std::ref
migraphx::context gctx = ctx;
auto output = op.compute_shape(inputs);
op.finalize(gctx, output, inputs);
auto args = generate_arguments(inputs);
auto run = [&] {
op.compute(gctx, output, args);
gctx.finish();
};
run();
auto r = range(n);
double t = std::accumulate(
r.begin(), r.end(), double{0.0}, [&](auto x, auto) { return x + time<milliseconds>(run); });
return t / n;
}
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
struct run_op : action<run_op>
{
static void apply(const parser& p, const value& v)
{
context ctx;
auto inputs = p.parse_shapes(v.at("inputs"));
auto name = v.at("name").to<std::string>();
if(not contains(name, "::"))
name = "gpu::" + name;
auto op = make_op(name);
double t = time_op(ctx, op, inputs);
std::cout << op << ": " << t << "ms" << std::endl;
}
};
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -185,7 +185,7 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
if(conv.algo == miopenConvolutionFwdAlgoWinograd and wei.lens()[2] != 3 and
wei.lens()[3] != 3 and contains({{1, 1}}, op.stride))
return false;
return contains({{0, 0}, {1, 1}, {2, 2}}, op.padding) and
return contains({{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 2, 2, 2}}, op.padding) and
contains({{0, 0}, {1, 1}}, op.stride) and contains({{1, 1}}, op.dilation);
}
......@@ -568,7 +568,7 @@ struct miopen_conv_bias
{
check_shapes{inputs, *this}.has(5);
// TODO: Check slices
return op.compute_shape({inputs.at(0), inputs.at(1)});
return op.normalize_compute_shape({inputs.at(0), inputs.at(1)});
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
......@@ -615,7 +615,7 @@ struct miopen_conv_bias_relu
{
check_shapes{inputs, *this}.has(5);
// TODO: Check slices
return op.compute_shape({inputs.at(0), inputs.at(1)});
return op.normalize_compute_shape({inputs.at(0), inputs.at(1)});
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
......@@ -717,7 +717,7 @@ struct find_gemm_add
auto gemm = any_cast<rocblas_gemm<op::dot>>(gemm_ins->get_operator());
// Already fused gemm
if(not float_equal(gemm.op.beta, 0))
if(not float_equal(gemm.beta, 0))
return;
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto i) {
......@@ -738,7 +738,7 @@ struct find_gemm_add
inputs.push_back(copy_ins);
inputs.push_back(copy_ins);
gemm.op.beta = 1;
gemm.beta = 1;
p.replace_instruction(ins, gemm, inputs);
}
};
......
......@@ -37,8 +37,12 @@ R rocblas_invoke(R (*f)(Ts...), Us... xs)
}
template <class T>
void gemm_impl(
context& ctx, const shape& output_shape, const std::vector<argument>& args, T alpha, T beta)
void gemm_impl(context& ctx,
const shape& output_shape,
const std::vector<argument>& args,
T alpha,
T beta,
bool int8_x4_format)
{
bool transa = args[0].get_shape().transposed();
bool transb = args[1].get_shape().transposed();
......@@ -62,6 +66,14 @@ void gemm_impl(
}
auto compute_type = output_type;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags flag =
int8_x4_format ? rocblas_gemm_flags_pack_int8x4 : rocblas_gemm_flags_none;
#else
(void)int8_x4_format;
int flag = 0;
#endif
auto a_lens = args[0].get_shape().lens();
auto b_lens = args[1].get_shape().lens();
output_shape.visit_type([&](auto as) {
......@@ -72,7 +84,7 @@ void gemm_impl(
rocblas_int n = out_lens[dim_1];
rocblas_int k = args[0].get_shape().lens()[dim_1];
auto to_pointer = [&](auto&& arg) { return as.from(arg.data()); };
if(args[0].get_shape().type() == shape::int8_type and (k % 4) != 0)
if(args[0].get_shape().type() == shape::int8_type and (k % 4) != 0 and int8_x4_format)
{
MIGRAPHX_THROW("ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!");
}
......@@ -109,11 +121,7 @@ void gemm_impl(
compute_type,
rocblas_gemm_algo_standard,
0,
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags_pack_int8x4);
#else
0);
#endif
flag);
}
else
{
......@@ -146,11 +154,7 @@ void gemm_impl(
compute_type,
rocblas_gemm_algo_standard,
0,
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags_pack_int8x4);
#else
0);
#endif
flag);
}
});
}
......@@ -159,18 +163,20 @@ void gemm(context& ctx,
const shape& output_shape,
const std::vector<argument>& args,
float alpha,
float beta)
float beta,
bool int8_x4_format)
{
gemm_impl(ctx, output_shape, args, alpha, beta);
gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format);
}
void gemm(context& ctx,
const shape& output_shape,
const std::vector<argument>& args,
int32_t alpha,
int32_t beta)
int32_t beta,
bool int8_x4_format)
{
gemm_impl(ctx, output_shape, args, alpha, beta);
gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format);
}
} // namespace gpu
......
......@@ -169,12 +169,26 @@ void gpu_copy(context& ctx, const argument& src, const argument& dst)
void copy_to_gpu(context& ctx, const argument& src, const argument& dst)
{
gpu_copy(ctx, register_on_gpu(src), dst);
if(src.get_shape() == dst.get_shape() and dst.get_shape().packed())
{
hip_async_copy(ctx, src, dst, hipMemcpyHostToDevice);
}
else
{
gpu_copy(ctx, register_on_gpu(src), dst);
}
}
void copy_from_gpu(context& ctx, const argument& src, const argument& dst)
{
gpu_copy(ctx, src, register_on_gpu(dst));
if(src.get_shape() == dst.get_shape() and dst.get_shape().packed())
{
hip_async_copy(ctx, src, dst, hipMemcpyDeviceToHost);
}
else
{
gpu_copy(ctx, src, register_on_gpu(dst));
}
}
argument get_preallocation(context& ctx, const std::string& id)
......
......@@ -14,6 +14,7 @@ struct gpu_allocation_model
std::string name() const;
std::string copy() const;
operation allocate(const shape& s) const;
operation preallocate(const shape& s, const std::string& id) const;
};
} // namespace gpu
......
......@@ -51,6 +51,7 @@ struct code_object_op
os << "symbol_name=" << op.symbol_name << ",";
os << "global=" << op.global << ",";
os << "local=" << op.local << ",";
os << "]";
return os;
}
};
......
......@@ -14,8 +14,9 @@ struct hip_compile_options
std::size_t local;
std::vector<shape> inputs;
shape output;
std::string kernel_name = "kernel";
std::string params = "";
std::string kernel_name = "kernel";
std::string params = "";
std::vector<shape> reduced_inputs = {};
};
operation compile_hip_code_object(const std::string& content, hip_compile_options options);
......
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
operation
compile_pointwise(context& ctx, const std::vector<shape>& inputs, const std::string& lambda);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void fill(hipStream_t stream, const argument& result, unsigned long val);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void multinomial(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#define MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void prefix_scan_sum(hipStream_t stream, const argument& result, const argument& arg, int32_t axis);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REVERSE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REVERSE_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument
reverse(hipStream_t stream, argument result, argument arg1, const std::vector<int64_t>& axes);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SCATTER_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument scatter(
hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument topk_smallest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
argument topk_largest(hipStream_t stream,
const argument& val_res,
const argument& ind_res,
const argument& arg,
int64_t k,
int64_t axis);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_WHERE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_WHERE_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void where(hipStream_t stream,
const argument& result,
const argument& arg0,
const argument& arg1,
const argument& arg2);
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#include <migraphx/config.hpp>
#include <string>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
std::string get_device_name();
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP
#include <migraphx/errors.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/value.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
......@@ -19,11 +22,17 @@ template <class Op>
struct rocblas_gemm
{
Op op;
float alpha = 1;
float beta = 0;
bool int8_x4_format = true;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
return pack_join(migraphx::reflect(self.op, f),
pack(f(self.alpha, "alpha"),
f(self.beta, "beta"),
f(self.int8_x4_format, "int8_x4_format")));
}
std::string name() const
......@@ -42,14 +51,25 @@ struct rocblas_gemm
check_shapes{in_shapes, *this}.not_broadcasted();
batch_not_transposed(inputs[0].strides());
batch_not_transposed(inputs[1].strides());
std::size_t kdim = inputs[0].lens().size() - 1;
// k be multiple of 4
if(op.name() == "quant_dot" && (inputs[0].lens()[kdim] % 4) != 0)
// if gemm and add are fused
if(not float_equal(beta, 0))
{
MIGRAPHX_THROW("GPU_GEMM: size of A {" + to_string_range(inputs[0].lens()) +
"} and B {" + to_string_range(inputs[1].lens()) +
"} must be multiple of 4 for int8 type");
auto cmat_shape = in_shapes.back();
in_shapes.pop_back();
auto op_out_shape = op.compute_shape(in_shapes);
if(cmat_shape.lens() != op_out_shape.lens())
{
MIGRAPHX_THROW(this->name() + " : dimension mismatch, operand C: {" +
to_string_range(cmat_shape.lens()) +
"}, cannot add to operand A * B: {" +
to_string_range(op_out_shape.lens()) + "}");
}
if(cmat_shape.type() != op_out_shape.type())
{
MIGRAPHX_THROW(this->name() + " : operand C type mismatch, operand C is of type: " +
to_string(cmat_shape.type()) +
", it must be: " + to_string(op_out_shape.type()));
}
}
return op.compute_shape(in_shapes);
......@@ -58,7 +78,14 @@ struct rocblas_gemm
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{
gemm(ctx, output_shape, args, op.alpha, op.beta);
if(this->name() == "gpu::gemm")
{
gemm(ctx, output_shape, args, alpha, beta, int8_x4_format);
}
else
{
gemm(ctx, output_shape, args, int32_t(alpha), int32_t(beta), int8_x4_format);
}
return args.back();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment