Commit cd4ab535 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

manual merge

parents 3891ee58 a0fa3742
......@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/gpu/time_op.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
......
......@@ -44,7 +44,7 @@ struct auto_register_action
template <class T>
static void apply()
{
auto name = get_type_name<T>();
const auto& name = get_type_name<T>();
register_action(name.substr(name.rfind("::") + 2),
[](auto&&... xs) { T::apply(std::forward<decltype(xs)>(xs)...); });
}
......
......@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/gpu/time_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/make_op.hpp>
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/fuse_ck.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
struct ck_gemm
{
operation op = make_op("dot");
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"));
}
std::string name() const { return "gpu::ck_gemm"; }
void check_gemm_shape(const shape& s) const
{
if(not contains(range(s.strides().rbegin(), s.strides().rbegin() + 3), 1))
MIGRAPHX_THROW("Invalid shape for ck_gemm");
}
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
check_shapes{inputs, *this}.same_ndims();
if(inputs.size() < 2)
MIGRAPHX_THROW("should have at least two inputs.");
auto a = inputs[0];
auto b = inputs[1];
for(const auto& input : inputs)
check_gemm_shape(input);
auto r = op.compute_shape({a, b});
if(mods.empty())
return r;
return r.with_type(mods.front()->get_output_shapes().front().type());
}
};
MIGRAPHX_REGISTER_OP(ck_gemm);
namespace {
bool is_ck_supported_type(shape::type_t t)
{
return contains({shape::half_type, shape::int8_type, shape::int32_type}, t);
}
MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
{
if(ins->name() != "dot" and ins->name() != "quant_dot")
return false;
if(not is_ck_supported_type(ins->get_shape().type()))
return false;
auto a = ins->inputs().front()->get_shape();
auto b = ins->inputs().back()->get_shape();
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
// to avoid poor-performing GEMM kernels from CK
// To-do: Investigate a more precise strategy
return a.lens().back() <= 2048;
}
struct find_ck_gemm_pointwise
{
// Find a gemm followed by a pointwise operation.
auto matcher() const
{
auto gemm = match::skip(match::name("contiguous"))(
match::name("dot", "quant_dot")(is_ck_gemm().bind("gemm")));
return match::name("pointwise")(match::any_of[match::inputs()](gemm.bind("x")));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto ins = r.result;
auto gemm_ins = r.instructions["gemm"];
auto x_ins = r.instructions["x"]; // input after contiguous
auto* pm = ins->module_inputs().front();
auto names = pm->get_parameter_names();
std::sort(names.begin(), names.end());
auto inputs = ins->inputs();
auto gemm_it = std::find(inputs.begin(), inputs.end(), x_ins);
auto gemm_idx = gemm_it - inputs.begin();
if(gemm_ins->get_shape().type() != shape::int32_type and
ins->get_shape().type() != gemm_ins->get_shape().type())
return;
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) {
return not is_ck_supported_type(input->get_shape().type());
}))
return;
assert(gemm_it != inputs.end());
if(gemm_idx != 0)
{
auto first_param = pm->get_parameter(names[0]);
auto gemm_param = pm->get_parameter(names[gemm_idx]);
auto new_gemm_param = pm->add_parameter(names[0] + "_0", gemm_param->get_shape());
auto new_first_param =
pm->add_parameter(names[gemm_idx] + "_0", first_param->get_shape());
pm->replace_instruction(gemm_param, new_gemm_param);
pm->replace_instruction(first_param, new_first_param);
pm->remove_instruction(first_param);
pm->remove_instruction(gemm_param);
}
inputs.erase(gemm_it);
inputs.insert(inputs.begin(), gemm_ins->inputs().begin(), gemm_ins->inputs().end());
mpm.get_module().replace_instruction(ins, ck_gemm{gemm_ins->get_operator()}, inputs, {pm});
}
};
struct find_ck_gemm
{
auto matcher() const { return match::name("dot")(is_ck_gemm().bind("gemm")); }
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto ins = r.result;
mpm.get_module().replace_instruction(ins, ck_gemm{ins->get_operator()}, ins->inputs());
}
};
} // namespace
void fuse_ck::apply(module_pass_manager& mpm) const
{
match::find_matches(mpm, find_ck_gemm_pointwise{});
match::find_matches(mpm, find_ck_gemm{});
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -38,9 +38,32 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_MLIR);
bool mlir_enabled()
{
#ifdef MIGRAPHX_MLIR
struct mlir_conv
const bool mlir_enabled = enabled(MIGRAPHX_ENABLE_MLIR{});
if(mlir_enabled)
{
return true;
}
else
{
std::cerr << "WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
"var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
<< std::endl;
return false;
}
#else
return false;
#endif
}
#ifdef MIGRAPHX_MLIR
struct mlir_op
{
std::string name() const { return "gpu::mlir_op"; }
operation op = make_op("convolution");
template <class Self, class F>
......@@ -49,7 +72,6 @@ struct mlir_conv
return pack(f(self.op, "op"));
}
std::string name() const { return "gpu::mlir_conv"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
check_shapes{inputs, *this}.packed_or_broadcasted();
......@@ -57,17 +79,50 @@ struct mlir_conv
MIGRAPHX_THROW("should have one submodule.");
if(inputs.size() < 2)
MIGRAPHX_THROW("should have at least two inputs.");
auto n = inputs.size();
return op.compute_shape({inputs[n - 2], inputs[n - 1]});
module_ref mod = mods[0];
auto type = mod->get_output_shapes().front().type();
std::unordered_map<instruction_ref, shape> ins_shapes;
size_t param_cnt = 0;
std::vector<std::string> names = mod->get_parameter_names();
std::sort(names.begin(), names.end());
for(std::string param_name : names)
{
ins_shapes[mod->get_parameter(param_name)] = inputs[param_cnt++];
}
for(auto ins : iterator_for(*mod))
{
if(ins->name() == "@param")
{
continue;
}
if(ins->name() == "@literal")
{
ins_shapes[ins] = ins->get_shape();
continue;
}
if(ins->name() == "@return")
{
return ins_shapes[ins->inputs().at(0)].with_type(type);
}
std::vector<shape> input_shapes;
input_shapes.resize(ins->inputs().size());
std::transform(ins->inputs().begin(),
ins->inputs().end(),
input_shapes.begin(),
[&](auto in) { return ins_shapes[in]; });
ins_shapes[ins] = ins->get_operator().compute_shape(input_shapes);
}
MIGRAPHX_THROW("No return found in the submodule");
}
};
MIGRAPHX_REGISTER_OP(mlir_conv);
MIGRAPHX_REGISTER_OP(mlir_op);
namespace {
MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
{
if(ins->name() != "convolution")
if(ins->name() != "convolution" and ins->name() != "quant_convolution")
return false;
value v = ins->get_operator().to_value();
auto group = v.at("group").to<int>();
......@@ -79,51 +134,107 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
return true;
}
struct find_conv_pointwise
struct find_mlir_op
{
// Find a convolution followed by a pointwise operation.
auto matcher() const
{
auto convolution =
match::skip(match::name("contiguous"))(is_mlir_conv().bind("convolution"));
return match::name("pointwise")(match::any_of[match::inputs()](convolution.bind("x")));
auto dot_or_conv = match::skip(match::name("contiguous"))(
match::any_of(match::name("dot"), is_mlir_conv()).bind("gemm_based_op"));
return match::name("pointwise")(match::any_of[match::inputs()](dot_or_conv.bind("x")));
}
std::unordered_map<instruction_ref, instruction_ref>
create_param_map_with_literals(module_ref mm, const module* pm, const shape& shape) const
{
std::unordered_map<instruction_ref, instruction_ref> ins_map;
for(auto ins : iterator_for(*pm))
{
if(ins->name() != "@literal")
{
continue;
}
literal r = ins->get_literal();
instruction_ref literal = mm->add_literal(r);
instruction_ref mbcast = mm->add_instruction(
make_op("multibroadcast", {{"out_lens", shape.lens()}}), literal);
ins_map[ins] = mbcast;
}
return ins_map;
}
std::tuple<instruction_ref, std::vector<instruction_ref>>
fuse_input_ops_and_gemm_based_op(module_ref mm, instruction_ref gemm_based_op) const
{
std::vector<instruction_ref> top_inputs;
std::vector<instruction_ref> imm_inputs;
size_t input_cnt = 0;
for(instruction_ref input : gemm_based_op->inputs())
{
std::vector<operation> op_stream;
while(contains({"slice", "transpose", "contiguous", "reshape"}, input->name()))
{
op_stream.push_back(input->get_operator());
input = input->inputs().at(0);
}
top_inputs.push_back(input);
instruction_ref prev_input =
mm->add_parameter("y" + std::to_string(input_cnt++), input->get_shape());
for(const auto& op : reverse(op_stream))
{
prev_input = mm->add_instruction(op, {prev_input});
}
imm_inputs.push_back(prev_input);
}
instruction_ref new_gemm_based_op =
mm->add_instruction(gemm_based_op->get_operator(), imm_inputs);
return {new_gemm_based_op, top_inputs};
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto ins = r.result;
auto conv_ins = r.instructions["convolution"];
auto x_ins = r.instructions["x"]; // input after contiguous
auto* pm = ins->module_inputs().front();
auto names = pm->get_parameter_names();
auto ins = r.result;
auto gemm_based_op = r.instructions["gemm_based_op"];
auto x_ins = r.instructions["x"]; // input after contiguous
auto* pm = ins->module_inputs().front();
auto names = pm->get_parameter_names();
// Whitelist pointwise operators
if(std::any_of(pm->begin(), pm->end(), [](const auto& i) {
return not contains({"@literal", "@param", "@return", "convolution", "add", "relu"},
return not contains({"@literal",
"@param",
"@return",
"convolution",
"quant_convolution",
"dot",
"add",
"relu",
"dequantizelinear",
"quantizelinear",
"mul"},
i.name());
}))
return;
// Only fuse with fp32/fp16
// Only fuse with fp32/fp16/int8/int32
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
return not contains({shape::type_t::float_type, shape::type_t::half_type},
return not contains({shape::type_t::float_type,
shape::type_t::half_type,
shape::type_t::int8_type,
shape::type_t::int32_type},
i->get_shape().type());
}))
return;
std::sort(names.begin(), names.end());
module_ref mm = mpm.create_module("mlir_" + pm->name());
mm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> param_map;
auto x = mm->add_parameter("x" + std::to_string(names.size()),
conv_ins->inputs().at(0)->get_shape());
auto w = mm->add_parameter("x" + std::to_string(names.size() + 1),
conv_ins->inputs().at(1)->get_shape());
auto conv = mm->add_instruction(conv_ins->get_operator(), {x, w});
std::unordered_map<instruction_ref, instruction_ref> param_map =
create_param_map_with_literals(mm, pm, gemm_based_op->get_shape());
auto [anchor_op, top_inputs] = fuse_input_ops_and_gemm_based_op(mm, gemm_based_op);
std::transform(names.begin(),
names.end(),
ins->inputs().begin(),
std::inserter(param_map, param_map.end()),
[&](auto name, auto input) {
[&, &anchor_op = anchor_op](auto name, auto input) {
if(input == x_ins)
return std::make_pair(pm->get_parameter(name), conv);
return std::make_pair(pm->get_parameter(name), anchor_op);
return std::make_pair(pm->get_parameter(name),
mm->add_parameter(name, input->get_shape()));
});
......@@ -133,12 +244,13 @@ struct find_conv_pointwise
std::copy_if(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(inputs),
[&](auto input) { return input != conv_ins; });
inputs.insert(inputs.end(), conv_ins->inputs().begin(), conv_ins->inputs().end());
[&](auto input) { return input != gemm_based_op; });
inputs.insert(inputs.end(), top_inputs.begin(), top_inputs.end());
mpm.get_module().replace_instruction(
ins, mlir_conv{conv_ins->get_operator()}, inputs, {mm});
ins, mlir_op{gemm_based_op->get_operator()}, inputs, {mm});
}
};
} // namespace
#endif
......@@ -146,17 +258,7 @@ struct find_conv_pointwise
void fuse_mlir::apply(module_pass_manager& mpm) const
{
#ifdef MIGRAPHX_MLIR
const bool mlir_enabled = enabled(MIGRAPHX_ENABLE_MLIR{});
if(mlir_enabled)
{
match::find_matches(mpm, find_conv_pointwise{});
}
else
{
std::cerr << "WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
"var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
<< std::endl;
}
match::find_matches(mpm, find_mlir_op{});
#else
(void)mpm;
#endif
......
......@@ -165,7 +165,8 @@ struct fusion
const std::unordered_set<std::string>& get_supported_archs()
{
static std::unordered_set<std::string> supported_archs{"gfx900", "gfx906", "gfx908", "gfx1030"};
static std::unordered_set<std::string> supported_archs{
"gfx900", "gfx906", "gfx908", "gfx1030", "gfx940"};
return supported_archs;
}
......
......@@ -140,13 +140,8 @@ void gemm_impl(context& ctx,
compute_type = rocblas_datatype_f32_r;
}
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags flag =
int8_x4_format ? rocblas_gemm_flags_pack_int8x4 : rocblas_gemm_flags_none;
#else
(void)int8_x4_format;
int flag = 0;
#endif
auto a_lens = args[0].get_shape().lens();
auto b_lens = args[1].get_shape().lens();
......
......@@ -146,7 +146,11 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
gpu_sync();
std::vector<T> result(sz);
assert(not is_device_ptr(result.data()));
assert(is_device_ptr(x));
if(not is_device_ptr(x))
{
MIGRAPHX_THROW(
"read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed\n");
}
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess)
MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
......@@ -189,8 +193,20 @@ argument register_on_gpu(const argument& arg)
argument to_gpu(const argument& arg, bool host)
{
auto p = write_to_gpu(arg.data(), arg.get_shape().bytes(), host);
return {arg.get_shape(), p};
argument result;
arg.visit(
[&](auto x) {
auto p = write_to_gpu(arg.data(), arg.get_shape().bytes(), host);
result = {x.get_shape(), p};
},
[&](const auto& xs) {
std::vector<argument> args;
std::transform(xs.begin(), xs.end(), std::back_inserter(args), [&](auto x) {
return to_gpu(x, host);
});
result = argument{args};
});
return result;
}
argument from_gpu(const argument& arg)
......
......@@ -21,71 +21,13 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
project(migraphx-doc)
find_package(ROCM REQUIRED)
include(ROCMDoxygenDoc)
set(DOXYGEN_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doxygen)
rocm_add_doxygen_doc(
OUTPUT_DIRECTORY ${DOXYGEN_OUTPUT}
INPUT
${CMAKE_SOURCE_DIR}/src
INCLUDE_PATH
${CMAKE_SOURCE_DIR}/src/include
${CMAKE_SOURCE_DIR}/src/targets/cpu/include
${CMAKE_SOURCE_DIR}/src/targets/gpu/include
STRIP_FROM_INC_PATH
${CMAKE_SOURCE_DIR}/src/include
${CMAKE_SOURCE_DIR}/src/targets/cpu/include
${CMAKE_SOURCE_DIR}/src/targets/gpu/include
EXCLUDE_PATTERNS
${CMAKE_SOURCE_DIR}/src/targets/gpu/kernels
${CMAKE_SOURCE_DIR}/src/targets/gpu/device
SEARCH_INCLUDES YES
MACRO_EXPANSION YES
RECURSIVE YES
GENERATE_XML YES
GENERATE_LATEX YES
USE_PDFLATEX YES
CALL_GRAPH YES
CALLER_GRAPH YES
BUILTIN_STL_SUPPORT YES
PROJECT_NAME MIGraphX
SORT_MEMBERS_CTORS_1ST YES
SOURCE_BROWSER YES
GENERATE_TREEVIEW YES
REFERENCED_BY_RELATION YES
REFERENCES_RELATION YES
REFERENCES_LINK_SOURCE YES
EXTRACT_ALL YES
ENUM_VALUES_PER_LINE 1
FULL_PATH_NAMES YES
WARN_LOGFILE "${DOXYGEN_OUTPUT}/DoxygenWarningLog.txt"
PREDEFINED DOXYGEN
add_executable(migraphx-hiprtc-driver
main.cpp
)
include(ROCMSphinxDoc)
rocm_add_sphinx_doc(src
BUILDER html
OUTPUT_DIR html
VARS
breathe_projects.proj=${DOXYGEN_OUTPUT}/xml
breathe_default_project=proj
DEPENDS doxygen
rocm_clang_tidy_check(migraphx-hiprtc-driver)
target_link_libraries(migraphx-hiprtc-driver PRIVATE migraphx_gpu)
add_dependencies(migraphx_all_targets migraphx-hiprtc-driver)
rocm_install_targets(
TARGETS migraphx-hiprtc-driver
)
find_package(LATEX)
if(LATEX_FOUND)
rocm_add_sphinx_doc(src
BUILDER latex
OUTPUT_DIR pdf
VARS
breathe_projects.proj=${DOXYGEN_OUTPUT}/xml
breathe_default_project=proj
DEPENDS doxygen
)
else()
message("Latex builder not found. Latex builder is required only for building the PDF documentation for MIGraphX and is not necessary for building the library, or any other components. To build PDF documentation run make in ${CMAKE_CURRENT_SOURCE_DIR}/pdf, once a latex builder is installed.")
endif()
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/value.hpp>
#include <migraphx/msgpack.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/ranges.hpp>
#include <iostream>
#include <cstring>
std::vector<char> read_stdin()
{
std::vector<char> result;
std::array<char, 1024> buffer;
std::size_t len = 0;
while((len = std::fread(buffer.data(), 1, buffer.size(), stdin)) > 0)
{
if(std::ferror(stdin) != 0 and std::feof(stdin) == 0)
MIGRAPHX_THROW(std::strerror(errno));
result.insert(result.end(), buffer.data(), buffer.data() + len);
}
return result;
}
int main(int argc, char const* argv[])
{
if(argc < 2 or migraphx::contains({"-h", "--help", "-v", "--version"}, std::string(argv[1])))
{
std::cout << "USAGE:" << std::endl;
std::cout << " ";
std::cout << "Used internally by migraphx to compile hip programs out-of-process."
<< std::endl;
std::exit(0);
}
std::string output_name = argv[1];
auto v = migraphx::from_msgpack(read_stdin());
std::vector<migraphx::gpu::hiprtc_src_file> srcs;
migraphx::from_value(v.at("srcs"), srcs);
auto out = migraphx::gpu::compile_hip_src_with_hiprtc(
std::move(srcs), v.at("params").to<std::string>(), v.at("arch").to<std::string>());
if(not out.empty())
migraphx::write_buffer(output_name, out.front());
}
......@@ -26,6 +26,7 @@
#include <migraphx/config.hpp>
#include <migraphx/module_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <string>
#include <unordered_map>
#include <vector>
......@@ -34,6 +35,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct shape;
struct operation;
namespace gpu {
......@@ -72,8 +74,23 @@ std::string make_transformer_args(Ts... xs)
std::string generate_pointwise(const module& pm, const std::string& name);
std::string generate_reduce(const module& m, const std::string& name);
std::string generate_name_from_ops(const module& m);
struct reduce_op
{
std::string input = "";
std::string reduction = "";
std::string init = "0";
std::string read = "op::id{}";
std::string write = "op::id{}";
void set(instruction_ref ins, const operation& op);
std::string str() const;
static std::string generate(instruction_ref ins, const std::string& x);
};
} // namespace gen
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -27,6 +27,8 @@
#include <migraphx/config.hpp>
#include <migraphx/filesystem.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/env.hpp>
#include <migraphx/functional.hpp>
#include <string>
#include <utility>
#include <vector>
......@@ -35,6 +37,31 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
#ifdef MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS);
#endif
struct hiprtc_src_file
{
hiprtc_src_file() = default;
hiprtc_src_file(const src_file& s)
: path(s.path.string()), content(s.content.first, s.content.second)
{
}
std::string path;
std::string content;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.path, "path"), f(self.content, "content"));
}
};
std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_src_file> srcs,
std::string params,
const std::string& arch);
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch);
......
......@@ -26,6 +26,7 @@
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/compile_src.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -39,9 +40,10 @@ struct hip_compile_options
std::size_t local;
std::vector<shape> inputs;
shape output;
std::string kernel_name = "kernel";
std::string params = "";
std::vector<shape> virtual_inputs = {};
std::string kernel_name = "kernel";
std::string params = "";
std::vector<shape> virtual_inputs = {};
std::vector<src_file> additional_src_files = {};
/**
* @brief Set the launch parameters but allow v to override the values
......@@ -71,6 +73,8 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size = 1024);
std::string generate_make_shape(const shape& s);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -38,7 +38,8 @@ struct context;
struct compile_ops
{
context* ctx = nullptr;
context* ctx = nullptr;
bool exhaustive_tune = false;
std::string name() const { return "gpu::compile_ops"; }
void apply(module& m) const;
};
......
......@@ -30,6 +30,8 @@
#include <migraphx/value.hpp>
#include <migraphx/module.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/rank.hpp>
#include <functional>
namespace migraphx {
......@@ -38,17 +40,58 @@ namespace gpu {
struct context;
using compiler_replace = std::function<void(module& m, instruction_ref ins)>;
using compiler_compile = std::function<compiler_replace(context&, instruction_ref, operation)>;
struct compiler_replace
{
compiler_replace() = default;
compiler_replace(const operation& op) : code_object{op} {}
template <class F>
compiler_replace(const operation& op, F f)
: code_object{op},
replace_fn([=](const compiler_replace& cr, module& m, instruction_ref ins) {
f(m, ins, cr.code_object);
})
{
}
operation code_object = {};
std::function<void(const compiler_replace& cr, module& m, instruction_ref ins)> replace_fn =
nullptr;
void replace(module& m, instruction_ref ins) const
{
if(replace_fn)
replace_fn(*this, m, ins);
else
m.replace_instruction(ins, code_object, ins->inputs());
}
};
struct tuning_config
{
value problem;
std::vector<value> solutions;
};
using compiler_compile =
std::function<compiler_replace(context&, instruction_ref, operation, const value&)>;
using compiler_compile_op =
std::function<operation(context&, const std::vector<shape>& inputs, const value&)>;
using compiler_tuning_config =
std::function<optional<tuning_config>(context&, instruction_ref, const operation&)>;
void register_compiler(const std::string& name, compiler_compile c, compiler_compile_op cop);
void register_compiler(const std::string& name,
compiler_compile c,
compiler_compile_op cop,
compiler_tuning_config ctg);
bool has_compiler_for(const std::string& name);
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op);
compiler_replace
compile(context& ctx, instruction_ref ins, const operation& op, const value& solution);
operation
compile_op(const std::string& name, context& ctx, const std::vector<shape>& inputs, const value& v);
optional<tuning_config> get_tuning_config(context& ctx, instruction_ref ins, const operation& op);
template <class T>
void register_compiler()
......@@ -58,8 +101,11 @@ void register_compiler()
{
register_compiler(
name,
[=](auto&&... xs) { return c.compile(std::forward<decltype(xs)>(xs)...); },
[=](auto&&... xs) { return c.compile_op(std::forward<decltype(xs)>(xs)...); });
[=](auto&&... xs) {
return c.invoke_compile(rank<1>{}, std::forward<decltype(xs)>(xs)...);
},
[=](auto&&... xs) { return c.compile_op(std::forward<decltype(xs)>(xs)...); },
[=](auto&&... xs) { return c.get_tuning_config(std::forward<decltype(xs)>(xs)...); });
}
}
......@@ -78,12 +124,30 @@ using auto_register_compiler = auto_register<register_compiler_action, T>;
template <class Derived>
struct compiler : auto_register_compiler<Derived>
{
auto replace(const operation& op) const
const Derived& derived() const { return static_cast<const Derived&>(*this); }
optional<tuning_config> get_tuning_config(context&, instruction_ref, const operation&) const
{
return
[=](module& m, instruction_ref ins) { m.replace_instruction(ins, op, ins->inputs()); };
return nullopt;
}
operation compile_op(context&, const std::vector<shape>&, const value&) const { return {}; }
template <class D = Derived>
auto invoke_compile(
rank<1>, context& ctx, instruction_ref ins, operation op, const value& solution) const
-> decltype(std::declval<D>().compile(ctx, ins, std::move(op), solution))
{
return derived().compile(ctx, ins, std::move(op), solution);
}
template <class D = Derived>
auto invoke_compile(
rank<0>, context& ctx, instruction_ref ins, operation op, const value& solution) const
-> decltype(std::declval<D>().compile(ctx, ins, std::move(op)))
{
assert(solution.empty());
(void)solution;
return derived().compile(ctx, ins, std::move(op));
}
};
} // namespace gpu
......
......@@ -170,7 +170,9 @@ struct hip_device
std::size_t stream_id() const { return current_stream; }
std::string get_device_name() const { return device_props.gcnArchName; }
std::string get_device_name() const { return get_arch_name(device_props); }
std::string get_gfx_name() const { return trim(split_string(get_device_name(), ':').front()); }
std::size_t get_device_major() const { return device_props.major; }
......
......@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/generate.hpp>
......
......@@ -27,10 +27,14 @@
#include <migraphx/config.hpp>
#include <string>
struct hipDeviceProp_t;
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
std::string get_arch_name(const hipDeviceProp_t& props);
std::string get_device_name();
int get_device_id();
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#define MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module_pass_manager;
namespace gpu {
struct fuse_ck
{
context* ctx = nullptr;
std::string name() const { return "gpu::fuse_ck"; }
void apply(module_pass_manager& mpm) const;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_FUSE_CK_HPP
......@@ -34,6 +34,8 @@ struct module_pass_manager;
namespace gpu {
bool mlir_enabled();
struct fuse_mlir
{
context* ctx = nullptr;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment