"...zh_cn/git@developer.sourcefind.cn:guobj/qwen_lmdeploy.git" did not exist on "7283781e605bf29fb075bfaf12748e11d99e16bb"
Commit 9747cc44 authored by Paul's avatar Paul
Browse files

Merge branch 'bert-opt2' into bert-opt3

parents 48dbbd11 db0301d7
...@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@02078ce236ad90e3aec04c0c770ef5bfc99e49c2 RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@26a4b3cfc0a1a15181490f24ae461608fef1b04e -DBUILD_MIXR_TARGET=On
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -349,25 +349,27 @@ match::matcher_result find_match(module& modl, M&& m) ...@@ -349,25 +349,27 @@ match::matcher_result find_match(module& modl, M&& m)
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_MATCHES) MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_MATCHES)
/// Find matches for an instruction in the module /// Find matches for an instruction in the module
template <class... Ms> template <class Mod, class... Ms>
void find_matches(module& mod, instruction_ref ins, Ms&&... ms) void find_matches(Mod& mod, instruction_ref ins, Ms&&... ms)
{ {
#if !defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5 #if !defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5
const const
#endif #endif
bool trace = enabled(MIGRAPHX_TRACE_MATCHES{}); int trace = value_of(MIGRAPHX_TRACE_MATCHES{});
bool match = false; bool match = false;
each_args( each_args(
[&](auto&& m) { [&](auto&& m) {
if(match) if(match)
return; return;
auto r = match_instruction(mod, ins, m.matcher()); if(trace > 1)
if(r.result == mod.end()) std::cout << "Match: " << get_type_name(m) << std::endl;
auto r = match_instruction(get_module(mod), ins, m.matcher());
if(r.result == get_module(mod).end())
return; return;
if(trace) if(trace > 0)
{ {
std::cout << "Matched by " << get_type_name(m) << std::endl; std::cout << "Matched by " << get_type_name(m) << std::endl;
mod.debug_print(ins); get_module(mod).debug_print(ins);
} }
m.apply(mod, r); m.apply(mod, r);
match = true; match = true;
...@@ -376,10 +378,10 @@ void find_matches(module& mod, instruction_ref ins, Ms&&... ms) ...@@ -376,10 +378,10 @@ void find_matches(module& mod, instruction_ref ins, Ms&&... ms)
} }
/// Find matches in a module /// Find matches in a module
template <class... Ms> template <class Mod, class... Ms>
void find_matches(module& mod, Ms&&... ms) void find_matches(Mod& mod, Ms&&... ms)
{ {
for(auto ins : iterator_for(mod)) for(auto ins : iterator_for(get_module(mod)))
{ {
find_matches(mod, ins, ms...); find_matches(mod, ins, ms...);
} }
......
...@@ -124,7 +124,7 @@ struct module ...@@ -124,7 +124,7 @@ struct module
std::unordered_map<instruction_ref, instruction_ref> map_ins = {}); std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref> std::vector<instruction_ref>
add_instructions(module_ref m, add_instructions(const_module_ref m,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {}); std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref> std::vector<instruction_ref>
...@@ -139,7 +139,7 @@ struct module ...@@ -139,7 +139,7 @@ struct module
std::vector<instruction_ref> std::vector<instruction_ref>
insert_instructions(instruction_ref ins, insert_instructions(instruction_ref ins,
module_ref m, const_module_ref m,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {}); std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref> std::vector<instruction_ref>
...@@ -164,6 +164,10 @@ struct module ...@@ -164,6 +164,10 @@ struct module
instruction_ref replace_return(std::vector<instruction_ref> args); instruction_ref replace_return(std::vector<instruction_ref> args);
instruction_ref insert_literal(instruction_ref ins, literal l);
instruction_ref insert_parameter(instruction_ref ins, std::string name, shape s);
std::vector<std::string> get_parameter_names() const; std::vector<std::string> get_parameter_names() const;
shape get_parameter_shape(std::string name) const; shape get_parameter_shape(std::string name) const;
...@@ -227,6 +231,8 @@ struct module ...@@ -227,6 +231,8 @@ struct module
std::unique_ptr<module_impl> impl; std::unique_ptr<module_impl> impl;
}; };
inline module& get_module(module& m) { return m; }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -32,7 +32,8 @@ namespace migraphx { ...@@ -32,7 +32,8 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
struct module; struct module;
using module_ref = module*; using module_ref = module*;
using const_module_ref = const module*;
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -198,6 +198,12 @@ void transform(Range&& r, Iterator it, F f) ...@@ -198,6 +198,12 @@ void transform(Range&& r, Iterator it, F f)
std::transform(r.begin(), r.end(), it, f); std::transform(r.begin(), r.end(), it, f);
} }
template <class Range1, class Range2, class Iterator, class F>
void transform(Range1&& r1, Range2&& r2, Iterator it, F f)
{
std::transform(r1.begin(), r1.end(), r2.begin(), it, f);
}
template <class Range> template <class Range>
auto reverse(Range& r) auto reverse(Range& r)
{ {
......
...@@ -191,6 +191,10 @@ struct shape ...@@ -191,6 +191,10 @@ struct shape
std::size_t size(std::size_t n = 1) const { return sizeof(type) * n; } std::size_t size(std::size_t n = 1) const { return sizeof(type) * n; }
auto is_integral() const { return std::is_integral<type>{}; }
auto is_signed() const { return std::is_signed<type>{}; }
auto is_unsigned() const { return std::is_unsigned<type>{}; }
template <class U> template <class U>
type* from(U* buffer, std::size_t n = 0) const type* from(U* buffer, std::size_t n = 0) const
{ {
......
...@@ -399,7 +399,8 @@ module::add_instructions(const std::vector<instruction_ref>& instructions, ...@@ -399,7 +399,8 @@ module::add_instructions(const std::vector<instruction_ref>& instructions,
} }
std::vector<instruction_ref> std::vector<instruction_ref>
module::add_instructions(module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins) module::add_instructions(const_module_ref m,
std::unordered_map<instruction_ref, instruction_ref> map_ins)
{ {
return this->insert_instructions(this->end(), m, std::move(map_ins)); return this->insert_instructions(this->end(), m, std::move(map_ins));
} }
...@@ -420,8 +421,10 @@ module::insert_instructions(instruction_ref ins, ...@@ -420,8 +421,10 @@ module::insert_instructions(instruction_ref ins,
return insert_generic_instructions(*this, ins, instructions, std::move(map_ins)); return insert_generic_instructions(*this, ins, instructions, std::move(map_ins));
} }
std::vector<instruction_ref> module::insert_instructions( std::vector<instruction_ref>
instruction_ref ins, module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins) module::insert_instructions(instruction_ref ins,
const_module_ref m,
std::unordered_map<instruction_ref, instruction_ref> map_ins)
{ {
return insert_generic_instructions(*this, ins, iterator_for(*m), std::move(map_ins)); return insert_generic_instructions(*this, ins, iterator_for(*m), std::move(map_ins));
} }
...@@ -436,11 +439,7 @@ module::insert_instructions(instruction_ref ins, ...@@ -436,11 +439,7 @@ module::insert_instructions(instruction_ref ins,
return insert_generic_instructions(*this, ins, iterator_for(r), std::move(map_ins)); return insert_generic_instructions(*this, ins, iterator_for(r), std::move(map_ins));
} }
instruction_ref module::add_literal(literal l) instruction_ref module::add_literal(literal l) { return insert_literal(begin(), std::move(l)); }
{
impl->emplace_front(std::move(l));
return impl->instructions.begin();
}
instruction_ref module::add_outline(const shape& s) instruction_ref module::add_outline(const shape& s)
{ {
...@@ -450,10 +449,7 @@ instruction_ref module::add_outline(const shape& s) ...@@ -450,10 +449,7 @@ instruction_ref module::add_outline(const shape& s)
instruction_ref module::add_parameter(std::string name, shape s) instruction_ref module::add_parameter(std::string name, shape s)
{ {
assert(get_parameter_shape(name) == shape{}); return insert_parameter(begin(), std::move(name), std::move(s));
impl->push_front({builtin::param{std::move(name), impl->nparams}, std::move(s), {}});
impl->nparams++;
return impl->instructions.begin();
} }
instruction_ref module::add_return(std::vector<instruction_ref> args) instruction_ref module::add_return(std::vector<instruction_ref> args)
...@@ -466,6 +462,20 @@ instruction_ref module::add_return(std::vector<instruction_ref> args) ...@@ -466,6 +462,20 @@ instruction_ref module::add_return(std::vector<instruction_ref> args)
return result; return result;
} }
instruction_ref module::insert_literal(instruction_ref ins, literal l)
{
impl->emplace(ins, std::move(l));
return std::prev(ins);
}
instruction_ref module::insert_parameter(instruction_ref ins, std::string name, shape s)
{
assert(get_parameter_shape(name) == shape{});
impl->insert(ins, {builtin::param{std::move(name), impl->nparams}, std::move(s), {}});
impl->nparams++;
return std::prev(ins);
}
instruction_ref module::replace_return(std::vector<instruction_ref> args) instruction_ref module::replace_return(std::vector<instruction_ref> args)
{ {
auto last = std::prev(this->end()); auto last = std::prev(this->end());
......
...@@ -504,12 +504,14 @@ static void mod_from_val(module_ref mod, ...@@ -504,12 +504,14 @@ static void mod_from_val(module_ref mod,
if(name == "@param") if(name == "@param")
{ {
output = mod->add_parameter(fields["parameter"].to<std::string>(), output = mod->insert_parameter(mod->end(),
migraphx::from_value<shape>(node.at("shape"))); fields["parameter"].to<std::string>(),
migraphx::from_value<shape>(node.at("shape")));
} }
else if(name == "@literal") else if(name == "@literal")
{ {
output = mod->add_literal(migraphx::from_value<literal>(node.at("literal"))); output =
mod->insert_literal(mod->end(), migraphx::from_value<literal>(node.at("literal")));
} }
else else
{ {
...@@ -544,11 +546,11 @@ static void mod_from_val(module_ref mod, ...@@ -544,11 +546,11 @@ static void mod_from_val(module_ref mod,
} }
else if(module_inputs.empty()) else if(module_inputs.empty())
{ {
output = mod->add_instruction(op, inputs); output = mod->insert_instruction(mod->end(), op, inputs);
} }
else else
{ {
output = mod->add_instruction(op, inputs, module_inputs); output = mod->insert_instruction(mod->end(), op, inputs, module_inputs);
} }
} }
output->set_normalized(normalized); output->set_normalized(normalized);
...@@ -681,11 +683,13 @@ void program::perf_report(std::ostream& os, ...@@ -681,11 +683,13 @@ void program::perf_report(std::ostream& os,
double overhead_percent = overhead_time * 100.0 / total_time; double overhead_percent = overhead_time * 100.0 / total_time;
double total_instruction_time = 0.0; double total_instruction_time = 0.0;
std::unordered_map<std::string, double> op_times; std::unordered_map<std::string, double> op_times;
std::unordered_map<std::string, std::size_t> op_n;
for(auto&& p : ins_vec) for(auto&& p : ins_vec)
{ {
double avg = common_average(p.second); double avg = common_average(p.second);
op_times[perf_group(p.first->get_operator())] += avg; op_times[perf_group(p.first->get_operator())] += avg;
total_instruction_time += avg; total_instruction_time += avg;
op_n[perf_group(p.first->get_operator())]++;
} }
double calculate_overhead_time = total_time - total_instruction_time; double calculate_overhead_time = total_time - total_instruction_time;
double calculate_overhead_percent = calculate_overhead_time * 100.0 / total_time; double calculate_overhead_percent = calculate_overhead_time * 100.0 / total_time;
...@@ -706,18 +710,19 @@ void program::perf_report(std::ostream& os, ...@@ -706,18 +710,19 @@ void program::perf_report(std::ostream& os,
os << std::endl; os << std::endl;
os << "Summary:" << std::endl; os << "Summary:" << std::endl;
std::vector<std::pair<double, std::string>> op_times_sorted; std::vector<std::tuple<double, std::size_t, std::string>> op_times_sorted;
std::transform(op_times.begin(), std::transform(
op_times.end(), op_times.begin(), op_times.end(), std::back_inserter(op_times_sorted), [&](auto p) {
std::back_inserter(op_times_sorted), auto&& name = p.first;
[](auto p) { return std::make_pair(p.second, p.first); }); return std::make_tuple(p.second, op_n.at(name), name);
});
std::sort(op_times_sorted.begin(), op_times_sorted.end(), std::greater<>{}); std::sort(op_times_sorted.begin(), op_times_sorted.end(), std::greater<>{});
for(auto&& p : op_times_sorted) for(auto&& [avg, nn, name] : op_times_sorted)
{ {
auto&& name = p.second;
double avg = p.first;
double percent = std::ceil(100.0 * avg / total_instruction_time); double percent = std::ceil(100.0 * avg / total_instruction_time);
os << name << ": " << avg << "ms, " << percent << "%" << std::endl; double per_ins = avg / nn;
os << name << ": " << avg << "ms / " << nn << " = " << per_ins << "ms, " << percent << "%"
<< std::endl;
} }
os << std::endl; os << std::endl;
......
...@@ -36,7 +36,7 @@ void raw_data_to_value(value& v, const RawData& rd) ...@@ -36,7 +36,7 @@ void raw_data_to_value(value& v, const RawData& rd)
result["shape"] = migraphx::to_value(rd.get_shape()); result["shape"] = migraphx::to_value(rd.get_shape());
if(rd.get_shape().type() == shape::tuple_type) if(rd.get_shape().type() == shape::tuple_type)
result["sub"] = migraphx::to_value(rd.get_sub_objects()); result["sub"] = migraphx::to_value(rd.get_sub_objects());
else else if(not rd.empty())
result["data"] = migraphx::value::binary(rd.data(), rd.get_shape().bytes()); result["data"] = migraphx::value::binary(rd.data(), rd.get_shape().bytes());
v = result; v = result;
} }
...@@ -56,7 +56,7 @@ void migraphx_from_value(const value& v, argument& a) ...@@ -56,7 +56,7 @@ void migraphx_from_value(const value& v, argument& a)
literal l = migraphx::from_value<literal>(v); literal l = migraphx::from_value<literal>(v);
a = l.get_argument(); a = l.get_argument();
} }
else else if(v.contains("sub"))
{ {
a = migraphx::from_value<std::vector<argument>>(v.at("sub")); a = migraphx::from_value<std::vector<argument>>(v.at("sub"));
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <migraphx/module.hpp> #include <migraphx/module.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -52,6 +53,7 @@ struct cpu_literal ...@@ -52,6 +53,7 @@ struct cpu_literal
return os; return os;
} }
}; };
MIGRAPHX_REGISTER_OP(cpu_literal);
void write_literals::apply(module& m) const void write_literals::apply(module& m) const
{ {
......
...@@ -164,6 +164,7 @@ add_library(migraphx_gpu ...@@ -164,6 +164,7 @@ add_library(migraphx_gpu
deconvolution.cpp deconvolution.cpp
device_name.cpp device_name.cpp
elu.cpp elu.cpp
fuse_mlir.cpp
fuse_ops.cpp fuse_ops.cpp
gather.cpp gather.cpp
gemm_impl.cpp gemm_impl.cpp
...@@ -176,7 +177,7 @@ add_library(migraphx_gpu ...@@ -176,7 +177,7 @@ add_library(migraphx_gpu
loop.cpp loop.cpp
lrn.cpp lrn.cpp
leaky_relu.cpp leaky_relu.cpp
mlir_conv.cpp mlir.cpp
multinomial.cpp multinomial.cpp
nonzero.cpp nonzero.cpp
pack_args.cpp pack_args.cpp
...@@ -320,16 +321,26 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}") ...@@ -320,16 +321,26 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "") set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "")
if(MIGRAPHX_ENABLE_MLIR) if(MIGRAPHX_ENABLE_MLIR)
find_library(LIBMLIRMIOPEN MLIRMIOpenThin REQUIRED) find_library(MLIRAPI_LIBRARY MLIRMIOpen
PATH_SUFFIXES
# Workaournd broken mlir install
lib/ lib/lib)
# REQUIRED is not supported before cmake 3.18 # REQUIRED is not supported before cmake 3.18
if(NOT LIBMLIRMIOPEN) if(NOT MLIRAPI_LIBRARY)
message(FATAL_ERROR "libMLIRMIOpenThin not found") message(FATAL_ERROR "libMLIRMIOpen not found")
else() else()
message(STATUS "Build with libMLIRMIOpenThin: " ${LIBMLIRMIOPEN}) message(STATUS "Build with libMLIRMIOpen: " ${MLIRAPI_LIBRARY})
endif() endif()
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR_MIOPEN_SUPPORT") find_path(MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h)
target_link_libraries(migraphx_gpu PUBLIC ${LIBMLIRMIOPEN}) # Workaround MLIR broken installation
find_path(MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
PATH_SUFFIXES
include/external/include external/include)
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
target_include_directories(migraphx_gpu SYSTEM PRIVATE ${MLIRAPI_HEADERS} ${MLIRAPI_HEADERS2})
target_link_libraries(migraphx_gpu PUBLIC ${MLIRAPI_LIBRARY})
endif() endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "") set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")
......
...@@ -51,8 +51,9 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& ...@@ -51,8 +51,9 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
std::vector<void*> kargs(args.size()); std::vector<void*> kargs(args.size());
std::transform( std::transform(
args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); }); args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); });
k.launch(ctx.get_stream().get(), global, local, std::move(kargs)); auto [start, stop] = ctx.get_perf_events();
return args.back(); k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
return args[get_output_arg(args.size())];
} }
void code_object_op::finalize(context&, const shape&, const std::vector<shape>&) void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
{ {
......
...@@ -59,31 +59,30 @@ argument miopen_deconvolution::compute(context& ctx, ...@@ -59,31 +59,30 @@ argument miopen_deconvolution::compute(context& ctx,
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape())); auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto y_desc = make_tensor(reshape_if_1d(output_shape));
float alpha = 1; if(solution_id == 0)
float beta = 0; MIGRAPHX_THROW("MIOpen Deconvolution: invalid solution ID");
auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha, auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(),
x_desc.get(), w_desc.get(),
args[0].implicit(), args[1].implicit(),
w_desc.get(), x_desc.get(),
args[1].implicit(), args[0].implicit(),
cd.get(), cd.get(),
algo, y_desc.get(),
&beta, args[3].implicit(),
y_desc.get(), args[2].implicit(),
args[3].implicit(), args[2].get_shape().bytes(),
args[2].implicit(), solution_id);
args[2].get_shape().bytes());
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Running deconvolution failed"); MIGRAPHX_THROW("MIOpen Deconvolution: running convolution failed");
return args[3]; return args[3];
} }
shape miopen_deconvolution::compile(context& ctx, shape miopen_deconvolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
const shape& output_shape,
std::vector<shape> inputs)
{ {
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0])); auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1])); auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto y_desc = make_tensor(reshape_if_1d(output_shape));
...@@ -119,9 +118,35 @@ shape miopen_deconvolution::compile(context& ctx, ...@@ -119,9 +118,35 @@ shape miopen_deconvolution::compile(context& ctx,
workspace_size, workspace_size,
false); false);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Find deconvolution failed"); MIGRAPHX_THROW("MIOpen Deconvolution: find convolution failed");
handle = ctx.get_stream().get_miopen(); algo = perf.fwd_algo;
algo = perf.fwd_algo;
size_t solution_count;
status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&solution_count);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: get solution count failed");
std::vector<miopenConvSolution_t> solutions(solution_count);
status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_count,
&solution_count,
solutions.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: get solution failed");
solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}}; return shape{shape::int8_type, {perf.memory}};
} }
...@@ -129,13 +154,29 @@ void miopen_deconvolution::finalize(context& ctx, ...@@ -129,13 +154,29 @@ void miopen_deconvolution::finalize(context& ctx,
const shape& output_shape, const shape& output_shape,
std::vector<shape> inputs) std::vector<shape> inputs)
{ {
if(handle == ctx.get_stream().get_miopen()) if(cd == nullptr)
return; cd = make_deconv(op);
// Check that workspace hasn't changed if(solution_id == 0)
auto size = inputs.at(2).bytes(); {
auto ws = compile(ctx, output_shape, std::move(inputs)); // Check that workspace hasn't changed
if(ws.bytes() > size) auto size = inputs.at(2).bytes();
MIGRAPHX_THROW("Workspace has changed during finalization."); auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Deconvolution: workspace has changed during finalization.");
}
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: compile solution failed");
} }
} // namespace gpu } // namespace gpu
......
...@@ -38,8 +38,11 @@ struct compile_op : action<compile_op> ...@@ -38,8 +38,11 @@ struct compile_op : action<compile_op>
context ctx; context ctx;
auto inputs = p.parse_shapes(v.at("inputs")); auto inputs = p.parse_shapes(v.at("inputs"));
auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v); auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v);
double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl; std::cout << op << ": " << host_time << "ms";
if(device_time > 0)
std::cout << ", " << device_time << "ms";
std::cout << std::endl;
} }
}; };
......
...@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace driver { namespace driver {
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n = 100); std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace driver } // namespace driver
} // namespace gpu } // namespace gpu
......
...@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig ...@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
} }
using milliseconds = std::chrono::duration<double, std::milli>; using milliseconds = std::chrono::duration<double, std::milli>;
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n) std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
{ {
// TODO: Use std::ref // TODO: Use std::ref
migraphx::context gctx = ctx; migraphx::context ctx = ictx;
auto output = op.compute_shape(inputs); auto& gctx = any_cast<migraphx::gpu::context>(ctx);
op.finalize(gctx, output, inputs); auto output = op.compute_shape(inputs);
op.finalize(ctx, output, inputs);
auto args = generate_arguments(inputs); auto args = generate_arguments(inputs);
auto run = [&] { auto run = [&] {
op.compute(gctx, output, args); op.compute(ctx, output, args);
gctx.finish(); ctx.finish();
}; };
gctx.enable_perf_measurement();
run(); run();
auto r = range(n); double host_time = 0.0;
double t = std::accumulate( double device_time = 0.0;
r.begin(), r.end(), double{0.0}, [&](auto x, auto) { return x + time<milliseconds>(run); }); for(auto i : range(n))
return t / n; {
(void)i;
host_time += time<milliseconds>(run);
device_time += gctx.get_elapsed_ms();
}
return std::make_pair(host_time / n, device_time / n);
} }
} // namespace driver } // namespace driver
......
...@@ -43,8 +43,8 @@ struct run_op : action<run_op> ...@@ -43,8 +43,8 @@ struct run_op : action<run_op>
auto op = make_op(name); auto op = make_op(name);
if(v.contains("fields")) if(v.contains("fields"))
op.from_value(v.at("fields")); op.from_value(v.at("fields"));
double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl; std::cout << op << ": " << host_time << "ms" << std::endl;
} }
}; };
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
#ifdef MIGRAPHX_MLIR
struct mlir_conv
{
operation op = make_op("convolution");
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"));
}
std::string name() const { return "gpu::mlir_conv"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
check_shapes{inputs, *this}.standard();
if(mods.size() != 1)
MIGRAPHX_THROW("should have one submodule.");
if(inputs.size() < 2)
MIGRAPHX_THROW("should have at least two inputs.");
auto n = inputs.size();
return op.compute_shape({inputs[n - 2], inputs[n - 1]});
}
};
MIGRAPHX_REGISTER_OP(mlir_conv);
namespace {
struct find_conv_pointwise
{
// Find a convolution followed by a pointwise operation.
auto matcher() const
{
auto convolution =
match::skip(match::name("contiguous"))(match::name("convolution").bind("convolution"));
return match::name("pointwise")(match::any_of[match::inputs()](convolution.bind("x")));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto ins = r.result;
auto conv_ins = r.instructions["convolution"];
auto x_ins = r.instructions["x"]; // input after contiguous
auto* pm = ins->module_inputs().front();
auto names = pm->get_parameter_names();
// Whitelist pointwise operators
if(std::any_of(pm->begin(), pm->end(), [](const auto& i) {
return not contains({"@literal", "@param", "@return", "convolution", "add", "relu"},
i.name());
}))
return;
// Only fuse with fp32 for now
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
return i->get_shape().type() != shape::type_t::float_type;
}))
return;
std::sort(names.begin(), names.end());
module_ref mm = mpm.create_module("mlir_" + pm->name());
mm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> param_map;
auto x = mm->add_parameter("x" + std::to_string(names.size()),
conv_ins->inputs().at(0)->get_shape());
auto w = mm->add_parameter("x" + std::to_string(names.size() + 1),
conv_ins->inputs().at(1)->get_shape());
auto conv = mm->add_instruction(conv_ins->get_operator(), {x, w});
std::transform(names.begin(),
names.end(),
ins->inputs().begin(),
std::inserter(param_map, param_map.end()),
[&](auto name, auto input) {
if(input == x_ins)
return std::make_pair(pm->get_parameter(name), conv);
return std::make_pair(pm->get_parameter(name),
mm->add_parameter(name, input->get_shape()));
});
mm->add_return(mm->insert_instructions(mm->end(), pm, param_map));
std::vector<instruction_ref> inputs;
std::copy_if(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(inputs),
[&](auto input) { return input != conv_ins; });
inputs.insert(inputs.end(), conv_ins->inputs().begin(), conv_ins->inputs().end());
mpm.get_module().replace_instruction(
ins, mlir_conv{conv_ins->get_operator()}, inputs, {mm});
}
};
} // namespace
#endif
void fuse_mlir::apply(module_pass_manager& mpm) const
{
#ifdef MIGRAPHX_MLIR
match::find_matches(mpm, find_conv_pointwise{});
#else
(void)mpm;
#endif
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -342,6 +342,7 @@ void move_standard_front(std::vector<instruction_ref>& args) ...@@ -342,6 +342,7 @@ void move_standard_front(std::vector<instruction_ref>& args)
auto gpu_name(const std::string& s) { return match::name("gpu::" + s); } auto gpu_name(const std::string& s) { return match::name("gpu::" + s); }
namespace {
struct find_layernorm struct find_layernorm
{ {
auto matcher() const { return match::layernorm(&gpu_name); } auto matcher() const { return match::layernorm(&gpu_name); }
...@@ -843,15 +844,6 @@ inline auto precompile_name(Strings... names) // NOLINT ...@@ -843,15 +844,6 @@ inline auto precompile_name(Strings... names) // NOLINT
}); });
} }
template <class... Ms>
auto conv_bias_pointwise(Ms... ms)
{
return precompile_name("pointwise")(
match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"),
fusable_conv(match::used_once()).bind("conv")),
ms...);
}
struct find_conv_bias struct find_conv_bias
{ {
context* ctx = nullptr; context* ctx = nullptr;
...@@ -1087,6 +1079,7 @@ struct find_commutative_broadcast ...@@ -1087,6 +1079,7 @@ struct find_commutative_broadcast
m.replace_instruction(ins, ins->get_operator(), args); m.replace_instruction(ins, ins->get_operator(), args);
} }
}; };
} // namespace
struct find_contiguous struct find_contiguous
{ {
......
...@@ -38,12 +38,13 @@ struct context; ...@@ -38,12 +38,13 @@ struct context;
struct code_object_op struct code_object_op
{ {
value::binary code_object; value::binary code_object{};
std::string symbol_name; std::string symbol_name = "";
std::size_t global; std::size_t global = 0;
std::size_t local; std::size_t local = 0;
std::vector<shape> expected_inputs; std::vector<shape> expected_inputs{};
shape output; shape output{};
std::int64_t output_arg = -1;
kernel k{}; kernel k{};
template <class Self, class F> template <class Self, class F>
...@@ -66,9 +67,13 @@ struct code_object_op ...@@ -66,9 +67,13 @@ struct code_object_op
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
void finalize(context&, const shape&, const std::vector<shape>&); void finalize(context&, const shape&, const std::vector<shape>&);
std::int64_t get_output_arg(std::size_t n) const
{
return output_arg < 0 ? n + output_arg : output_arg;
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{ {
return shapes.size() - 1; return get_output_arg(shapes.size());
} }
friend std::ostream& operator<<(std::ostream& os, const code_object_op& op) friend std::ostream& operator<<(std::ostream& os, const code_object_op& op)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment