#include #include #include #include #include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { struct module; namespace gpu { struct ck_gemm { operation op = make_op("dot"); template static auto reflect(Self& self, F f) { return pack(f(self.op, "op")); } std::string name() const { return "gpu::ck_gemm"; } void check_gemm_shape(const shape& s) const { if(contains(s.lens(), 1)) MIGRAPHX_THROW("Invalid shape for ck_gemm"); } shape compute_shape(std::vector inputs, const std::vector& mods) const { check_shapes{inputs, *this}.same_ndims(); // if(mods.size() != 1) // MIGRAPHX_THROW("should have one submodule."); if(inputs.size() < 2) MIGRAPHX_THROW("should have at least two inputs."); auto a = inputs[0]; auto b = inputs[1]; for(const auto& input : inputs) check_gemm_shape(input); return op.compute_shape({a, b}); } }; MIGRAPHX_REGISTER_OP(ck_gemm); namespace { MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins) { if(ins->name() != "dot") return false; auto a = ins->inputs().front()->get_shape(); auto b = ins->inputs().back()->get_shape(); if(a.lens().size() > 2 or b.lens().size() > 2) return false; if(a.lens()[1] > 2048) return false; return true; } struct find_ck_gemm { // Find a gemm followed by a pointwise operation. auto matcher() const { auto gemm = match::skip(match::name("contiguous"))(match::name("dot")(is_ck_gemm().bind("gemm"))); return match::name("pointwise")(match::any_of[match::inputs()](gemm.bind("x"))); } void apply(module_pass_manager& mpm, const match::matcher_result& r) const { auto ins = r.result; auto gemm_ins = r.instructions["gemm"]; auto x_ins = r.instructions["x"]; // input after contiguous auto* pm = ins->module_inputs().front(); auto names = pm->get_parameter_names(); std::sort(names.begin(), names.end()); auto inputs = ins->inputs(); auto gemm_it = std::find(inputs.begin(), inputs.end(), x_ins); auto gemm_idx = gemm_it - inputs.begin(); assert(gemm_it != inputs.end()); if(gemm_idx != 0) { auto first_param = pm->get_parameter(names[0]); auto gemm_param = pm->get_parameter(names[gemm_idx]); auto new_gemm_param = pm->add_parameter(names[0] + ".0", gemm_param->get_shape()); auto new_first_param = pm->add_parameter(names[gemm_idx] + ".0", first_param->get_shape()); pm->replace_instruction(gemm_param, new_gemm_param); pm->replace_instruction(first_param, new_first_param); pm->remove_instruction(first_param); pm->remove_instruction(gemm_param); } inputs.erase(gemm_it); inputs.insert(inputs.begin(), gemm_ins->inputs().begin(), gemm_ins->inputs().end()); mpm.get_module().replace_instruction(ins, ck_gemm{}, inputs, {pm}); } }; } // namespace void fuse_ck::apply(module_pass_manager& mpm) const { match::find_matches(mpm, find_ck_gemm{}); } } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx