"megatron/vscode:/vscode.git/clone" did not exist on "bcd605f8570ebeeb0436c115ebbfafc3c5a40ae5"
Commit 3f322644 authored by Alan Turner's avatar Alan Turner
Browse files

Merge remote-tracking branch 'origin/develop' into ck-gsg

parents 53aee707 09aaa63e
......@@ -210,17 +210,15 @@ void program::compile(const target& t, compile_options options)
assert(not this->is_compiled());
this->impl->target_name = t.name();
this->impl->ctx = t.get_context();
if(enabled(MIGRAPHX_TRACE_COMPILE{}))
options.trace = tracer{std::cout};
options.trace(*this);
options.trace();
auto&& passes = t.get_passes(this->impl->ctx, options);
run_passes(*this, passes, options.trace);
auto mods = this->get_modules();
// Validate and finalize
for(const auto& mod : reverse(mods))
{
......@@ -381,7 +379,7 @@ std::vector<argument> generic_eval(const module* mod,
}));
}
assert(results.find(ins) != results.end());
if(not ins->get_shape().dynamic())
if(not ins->get_shape().any_of_dynamic())
{
assert(results.at(ins).get_shape() == ins->get_shape());
}
......
......@@ -44,7 +44,7 @@ bool skip_propogate(instruction_ref ins)
return false;
}
bool is_const(instruction_ref ins) { return ins->can_eval() and not skip_propogate(ins); }
bool is_const_ins(instruction_ref ins) { return ins->can_eval() and not skip_propogate(ins); }
void propagate_constant::apply(module& m) const
{
......@@ -54,14 +54,23 @@ void propagate_constant::apply(module& m) const
// Find instructions that can be evaluated to a literal
for(auto i : iterator_for(m))
{
if(is_const(i) and i != last)
const bool is_const = is_const_ins(i);
if(is_const and i != last)
continue;
std::copy_if(
i->inputs().begin(),
i->inputs().end(),
std::inserter(const_instrs, const_instrs.begin()),
[&](const instruction_ref ins) { return is_const(ins) and ins->name() != "@literal"; });
if(i == last and is_const)
{
const_instrs.insert(i);
}
else
{
std::copy_if(i->inputs().begin(),
i->inputs().end(),
std::inserter(const_instrs, const_instrs.begin()),
[&](const instruction_ref ins) {
return is_const_ins(ins) and ins->name() != "@literal";
});
}
}
// Compute literals in parallel
......
......@@ -329,15 +329,21 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
.def("is_compiled", &migraphx::program::is_compiled)
.def(
"compile",
[](migraphx::program& p, const migraphx::target& t, bool offload_copy, bool fast_math) {
[](migraphx::program& p,
const migraphx::target& t,
bool offload_copy,
bool fast_math,
bool exhaustive_tune) {
migraphx::compile_options options;
options.offload_copy = offload_copy;
options.fast_math = fast_math;
options.offload_copy = offload_copy;
options.fast_math = fast_math;
options.exhaustive_tune = exhaustive_tune;
p.compile(t, options);
},
py::arg("t"),
py::arg("offload_copy") = true,
py::arg("fast_math") = true)
py::arg("offload_copy") = true,
py::arg("fast_math") = true,
py::arg("exhaustive_tune") = false)
.def("get_main_module", [](const migraphx::program& p) { return p.get_main_module(); })
.def(
"create_module",
......
......@@ -33,7 +33,17 @@ std::unordered_map<std::string, operation>& op_map()
static std::unordered_map<std::string, operation> m; // NOLINT
return m;
}
void register_op_init() { (void)op_map(); }
void register_op(const operation& op) { op_map()[op.name()] = op; }
void unregister_op(const std::string& op_name)
{
assert(op_map().count(op_name));
op_map().erase(op_name);
}
operation load_op(const std::string& name)
{
return at(op_map(), name, "Operator not found: " + name);
......
......@@ -21,26 +21,48 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <string>
#include <unordered_map>
#include <migraphx/register_target.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/dynamic_loader.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
void store_target_lib(const dynamic_loader& lib)
{
static std::vector<dynamic_loader> target_loader;
target_loader.emplace_back(lib);
}
std::unordered_map<std::string, target>& target_map()
{
static std::unordered_map<std::string, target> m; // NOLINT
return m;
}
void register_target_init() { (void)target_map(); }
void unregister_target(const std::string& name)
{
assert(target_map().count(name));
target_map().erase(name);
}
void register_target(const target& t) { target_map()[t.name()] = t; }
target make_target(const std::string& name)
{
if(not contains(target_map(), name))
{
std::string target_name = "libmigraphx_" + name + ".so";
store_target_lib(dynamic_loader(target_name));
}
const auto it = target_map().find(name);
if(it == target_map().end())
{
MIGRAPHX_THROW("Requested target '" + name + "' is not enabled or not supported");
MIGRAPHX_THROW("Requested target '" + name + "' is not loaded or not supported");
}
return it->second;
}
......
......@@ -104,19 +104,16 @@ void replace_allocate::apply(module& m) const
continue;
auto s = ins->get_shape();
if(not main_offload_copy and model.needs_out_params() and contains(mod_output_names, ins))
{
auto out_param = m.add_parameter(mod_output_names[ins], s);
m.replace_instruction(ins, out_param);
continue;
}
m.replace_instruction(
ins,
m.insert_instruction(ins,
make_op(model.name(), migraphx::value{{"shape", to_value(s)}})));
else
{
m.replace_instruction(ins,
make_op(model.name(), migraphx::value{{"shape", to_value(s)}}));
}
}
}
......
......@@ -327,10 +327,10 @@ struct stream_info
return [=](auto f) {
return fix<bool>([&](auto self, auto ins) {
return all_of(select(ins), [&](auto i) {
if(iweights.at(i) == 0)
return self(i);
else
if(has_stream(i))
return f(this->get_stream(i));
else
return self(i);
});
})(start);
};
......
......@@ -483,6 +483,17 @@ std::string shape::type_string() const { return name(this->type()); }
bool shape::dynamic() const { return not impl->m_dyn_dims.empty(); }
bool shape::any_of_dynamic() const
{
if(this->dynamic())
{
return true;
}
return std::any_of(this->sub_shapes().cbegin(), this->sub_shapes().cend(), [](auto s) {
return s.any_of_dynamic();
});
}
const std::vector<shape::dynamic_dimension>& shape::dyn_dims() const { return impl->m_dyn_dims; }
std::vector<std::size_t> shape::min_lens() const
......
......@@ -1009,7 +1009,7 @@ struct find_neg_unit_ops
auto ins = r.result;
auto c_in = r.instructions["x"];
auto neg = m.add_instruction(make_op("neg"), c_in);
auto neg = m.insert_instruction(ins, make_op("neg"), c_in);
m.replace_instruction(ins, neg);
}
};
......
......@@ -40,14 +40,11 @@ struct target
std::string name() const;
std::vector<pass> get_passes(migraphx::context& gctx, const compile_options&) const;
migraphx::context get_context() const { return context{}; }
argument copy_to(const argument& arg) const { return arg; }
argument copy_from(const argument& arg) const { return arg; }
argument allocate(const shape& s) const;
};
MIGRAPHX_REGISTER_TARGET(target);
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -23,7 +23,6 @@
*/
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/check_context.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_allocation.hpp>
......
......@@ -43,14 +43,11 @@ struct target
std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
migraphx::context get_context() const { return context{}; }
supported_segments find_supported(const_module_ref mod, support_metric m) const;
argument copy_to(const argument& arg) const { return arg; }
argument copy_from(const argument& arg) const { return arg; }
argument allocate(const shape& s) const;
};
MIGRAPHX_REGISTER_TARGET(target);
} // namespace fpga
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -22,7 +22,7 @@
# THE SOFTWARE.
# ####################################################################################
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip)
find_package(miopen)
# rocblas
......@@ -172,21 +172,6 @@ register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp
rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_gpu)
get_filename_component(CMAKE_CXX_COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH)
if(NOT CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+$")
find_program(MIGRAPHX_EXTRACT_KERNEL extractkernel
PATH_SUFFIXES bin
HINTS ${CMAKE_CXX_COMPILER_PATH}
PATHS
/opt/rocm/hip
/opt/rocm/hcc
/opt/rocm
)
endif()
message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "")
if(MIGRAPHX_ENABLE_MLIR)
......@@ -228,7 +213,6 @@ else()
target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
"-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}"
"-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}"
)
if(DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
......@@ -244,8 +228,7 @@ get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
# TODO: Set default to HAS_FIND_2_API
set(MIGRAPHX_USE_FIND_2_API OFF CACHE BOOL "")
set(MIGRAPHX_USE_FIND_2_API "${HAS_FIND_2_API}" CACHE BOOL "")
if(MIGRAPHX_USE_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
......@@ -261,8 +244,6 @@ else()
message(STATUS "MIOpen does not have find mode api")
endif()
# Workaround broken rocblas headers
target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
......
......@@ -207,12 +207,6 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
#else // MIGRAPHX_USE_HIPRTC
bool is_hcc_compiler()
{
static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "hcc");
return result;
}
bool is_hip_clang_compiler()
{
static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "clang++");
......@@ -236,7 +230,7 @@ std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
{
assert(not srcs.empty());
if(not is_hcc_compiler() and not is_hip_clang_compiler())
if(not is_hip_clang_compiler())
MIGRAPHX_THROW("Unknown hip compiler: " +
std::string(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER)));
......@@ -246,16 +240,9 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
if(enabled(MIGRAPHX_GPU_DEBUG_SYM{}))
params += " -g";
params += " -c";
if(is_hcc_compiler())
{
params += " -amdgpu-target=" + arch;
}
else if(is_hip_clang_compiler())
{
params += " --offload-arch=" + arch;
params += " --cuda-device-only";
params += " -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " ";
}
params += " --offload-arch=" + arch;
params += " --cuda-device-only";
params += " -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " ";
if(enabled(MIGRAPHX_GPU_DEBUG{}))
params += " -DMIGRAPHX_DEBUG";
......@@ -270,24 +257,6 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
if(has_compiler_launcher())
compiler.launcher = MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_LAUNCHER);
#endif
if(is_hcc_compiler())
compiler.process = [&](const fs::path& obj_path) -> fs::path {
process{MIGRAPHX_STRINGIZE(MIGRAPHX_EXTRACT_KERNEL) + std::string{" -i "} +
obj_path.string()}
.cwd(obj_path.parent_path());
for(const auto& entry : fs::directory_iterator{obj_path.parent_path()})
{
const auto& hsaco_path = entry.path();
if(not fs::is_regular_file(hsaco_path))
continue;
if(hsaco_path.extension() != ".hsaco")
continue;
return hsaco_path;
}
MIGRAPHX_THROW("Missing hsaco");
};
if(enabled(MIGRAPHX_GPU_DUMP_SRC{}))
{
for(const auto& src : srcs)
......
......@@ -112,14 +112,8 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
#ifdef MIGRAPHX_USE_CLANG_TIDY
#define MIGRAPHX_DEVICE_SHARED
#else
// Workaround hcc's broken tile_static macro
#ifdef tile_static
#undef tile_static
#define MIGRAPHX_DEVICE_SHARED __attribute__((tile_static))
#else
#define MIGRAPHX_DEVICE_SHARED __shared__
#endif
#endif
} // namespace device
} // namespace gpu
......
......@@ -27,6 +27,7 @@
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/env.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -35,6 +36,8 @@ struct module;
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_MLIR);
#ifdef MIGRAPHX_MLIR
struct mlir_conv
{
......@@ -143,7 +146,17 @@ struct find_conv_pointwise
void fuse_mlir::apply(module_pass_manager& mpm) const
{
#ifdef MIGRAPHX_MLIR
match::find_matches(mpm, find_conv_pointwise{});
const bool mlir_enabled = enabled(MIGRAPHX_ENABLE_MLIR{});
if(mlir_enabled)
{
match::find_matches(mpm, find_conv_pointwise{});
}
else
{
std::cerr << "WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
"var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
<< std::endl;
}
#else
(void)mpm;
#endif
......
......@@ -30,6 +30,7 @@
#include <migraphx/gpu/hip.hpp>
#include <migraphx/env.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <unordered_map>
#include <memory>
......@@ -215,6 +216,10 @@ struct context
return *current_device;
}
bool get_exhaustive_tune_flag() const { return exhaustive_tune; }
void set_exhaustive_tune_flag(bool t) { exhaustive_tune = t; }
hip_device::stream& get_stream() { return get_current_device().get_stream(); }
hip_device::stream& get_stream(std::size_t n) { return get_current_device().get_stream(n); }
......@@ -273,7 +278,8 @@ struct context
auto v_streams = v.at("streams");
std::size_t n_streams = v_streams.without_key().to<std::size_t>();
this->current_device = std::make_shared<hip_device>(0, n_streams);
auto device = get_device_id();
this->current_device = std::make_shared<hip_device>(device, n_streams);
}
void wait_for(any_ptr queue)
......@@ -336,7 +342,8 @@ struct context
// TODO: Make this a vector to support multiple devices
std::shared_ptr<hip_device> current_device;
std::vector<shared<hip_event_ptr>> events;
bool measure_perf = false;
bool exhaustive_tune = false;
bool measure_perf = false;
// for event perf timing
shared<hip_event_ptr> start_event = nullptr;
shared<hip_event_ptr> stop_event = nullptr;
......
......@@ -27,7 +27,6 @@
#include <migraphx/shape.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
......@@ -175,8 +174,9 @@ struct miopen_convolution
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
solution_ptr = find_solution(miopen_stream_handle, conv_problem.get());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
solution_ptr = find_solution(
miopen_stream_handle, conv_problem.get(), ctx.get_exhaustive_tune_flag());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");
......@@ -233,7 +233,7 @@ struct miopen_convolution
&perf,
workspace.implicit(),
workspace_size,
false);
ctx.get_exhaustive_tune_flag());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() + " : find convolution failed");
algo = perf.fwd_algo;
......
......@@ -98,6 +98,13 @@ struct hip_sync_stream
return {};
return args.front();
}
std::ptrdiff_t output_alias(const std::vector<shape>& args) const
{
if(args.empty())
return -1;
return 0;
}
};
struct hip_copy_to_gpu
......
......@@ -33,6 +33,13 @@ inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
/**
* Compiler pass that makes GPU-specific instruction changes.
* * Copies to and from the device if `offload_copy` is true.
* * Maps instructions to their GPU-specific counterparts.
* * Inserts `allocate` instructions before GPU operators.
*/
struct lowering
{
context* ctx;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment