Commit 2ab23275 authored by Alan Turner's avatar Alan Turner
Browse files

Merge remote-tracking branch 'origin/develop' into ck-integration-tuning

parents 2c3563dd 013d4829
...@@ -24,6 +24,40 @@ ...@@ -24,6 +24,40 @@
find_program(EMBED_LD ld) find_program(EMBED_LD ld)
find_program(EMBED_OBJCOPY objcopy) find_program(EMBED_OBJCOPY objcopy)
if(LINUX)
option(EMBED_USE_LD "Use ld to embed data files" ON)
else()
option(EMBED_USE_LD "Use ld to embed data files" OFF)
endif()
function(wrap_string)
set(options)
set(oneValueArgs VARIABLE AT_COLUMN)
set(multiValueArgs)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cmake_parse_arguments(WRAP_STRING "${options}" "${oneValueArgs}" "" ${ARGN})
string(LENGTH ${${PARSE_VARIABLE}} string_length)
math(EXPR offset "0")
while(string_length GREATER 0)
if(string_length GREATER ${PARSE_AT_COLUMN})
math(EXPR length "${PARSE_AT_COLUMN}")
else()
math(EXPR length "${string_length}")
endif()
string(SUBSTRING ${${PARSE_VARIABLE}} ${offset} ${length} line)
set(lines "${lines}\n${line}")
math(EXPR string_length "${string_length} - ${length}")
math(EXPR offset "${offset} + ${length}")
endwhile()
set(${PARSE_VARIABLE} "${lines}" PARENT_SCOPE)
endfunction()
function(generate_embed_source EMBED_NAME) function(generate_embed_source EMBED_NAME)
set(options) set(options)
set(oneValueArgs SRC HEADER) set(oneValueArgs SRC HEADER)
...@@ -46,14 +80,21 @@ function(generate_embed_source EMBED_NAME) ...@@ -46,14 +80,21 @@ function(generate_embed_source EMBED_NAME)
list(GET PARSE_OBJECTS ${idx} OBJECT) list(GET PARSE_OBJECTS ${idx} OBJECT)
set(START_SYMBOL "_binary_${SYMBOL}_start") set(START_SYMBOL "_binary_${SYMBOL}_start")
set(END_SYMBOL "_binary_${SYMBOL}_end") set(END_SYMBOL "_binary_${SYMBOL}_end")
string(APPEND EXTERNS " if(EMBED_USE_LD)
extern const char ${START_SYMBOL}[]; string(APPEND EXTERNS "
extern const char ${END_SYMBOL}[]; extern const char ${START_SYMBOL}[];
") extern const char ${END_SYMBOL}[];
")
else()
string(APPEND EXTERNS "
extern const char ${START_SYMBOL}[];
extern const char* ${END_SYMBOL};
")
endif()
# TODO: Should use NAME_WLE # TODO: Should use NAME_WLE
get_filename_component(BASE_NAME "${OBJECT}" NAME) get_filename_component(BASE_NAME "${OBJECT}" NAME)
string(REGEX REPLACE ".[A-Za-z0-9_]$" "" BASE_NAME ${BASE_NAME}) string(REGEX REPLACE ".[A-Za-z0-9_]+$" "" BASE_NAME ${BASE_NAME})
string(APPEND INIT_KERNELS " string(APPEND INIT_KERNELS "
{ \"${BASE_NAME}\", { ${START_SYMBOL}, ${END_SYMBOL}} }, { \"${BASE_NAME}\", { ${START_SYMBOL}, ${END_SYMBOL}} },
...@@ -86,17 +127,37 @@ function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE) ...@@ -86,17 +127,37 @@ function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
string(MAKE_C_IDENTIFIER "${REL_FILE}" SYMBOL) string(MAKE_C_IDENTIFIER "${REL_FILE}" SYMBOL)
get_filename_component(OUTPUT_FILE_DIR "${REL_FILE}" DIRECTORY) get_filename_component(OUTPUT_FILE_DIR "${REL_FILE}" DIRECTORY)
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_FILE_DIR}") file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_FILE_DIR}")
set(OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${REL_FILE}.o") if(EMBED_USE_LD)
set(OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${REL_FILE}.o")
else()
set(OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${REL_FILE}.cpp")
endif()
set(${OUTPUT_SYMBOL} ${SYMBOL} PARENT_SCOPE) set(${OUTPUT_SYMBOL} ${SYMBOL} PARENT_SCOPE)
set(${OUTPUT_FILE} "${OUT_FILE}" PARENT_SCOPE) set(${OUTPUT_FILE} "${OUT_FILE}" PARENT_SCOPE)
add_custom_command( if(EMBED_USE_LD)
OUTPUT "${OUT_FILE}" add_custom_command(
COMMAND ${EMBED_LD} -r -o "${OUT_FILE}" -z noexecstack --format=binary "${REL_FILE}" OUTPUT "${OUT_FILE}"
COMMAND ${EMBED_OBJCOPY} --rename-section .data=.rodata,alloc,load,readonly,data,contents "${OUT_FILE}" COMMAND ${EMBED_LD} -r -o "${OUT_FILE}" -z noexecstack --format=binary "${REL_FILE}"
WORKING_DIRECTORY ${WORKING_DIRECTORY} COMMAND ${EMBED_OBJCOPY} --rename-section .data=.rodata,alloc,load,readonly,data,contents "${OUT_FILE}"
DEPENDS ${FILE} WORKING_DIRECTORY ${WORKING_DIRECTORY}
VERBATIM DEPENDS ${FILE}
) VERBATIM
)
else()
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${FILE})
# reads source file contents as hex string
file(READ ${FILE} HEX_STRING HEX)
# wraps the hex string into multiple lines
wrap_string(VARIABLE HEX_STRING AT_COLUMN 80)
# adds '0x' prefix and comma suffix before and after every byte respectively
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1, " ARRAY_VALUES ${HEX_STRING})
# removes trailing comma
string(REGEX REPLACE ", $" "" ARRAY_VALUES ${ARRAY_VALUES})
file(WRITE "${OUT_FILE}" "
extern const char _binary_${SYMBOL}_start[] = { ${ARRAY_VALUES} };
extern const char* _binary_${SYMBOL}_end = _binary_${SYMBOL}_start + sizeof(_binary_${SYMBOL}_start);
\n")
endif()
endforeach() endforeach()
endfunction() endfunction()
...@@ -119,6 +180,6 @@ function(add_embed_library EMBED_NAME) ...@@ -119,6 +180,6 @@ function(add_embed_library EMBED_NAME)
generate_embed_source(${EMBED_NAME} SRC ${SRC_FILE} HEADER ${HEADER_FILE} OBJECTS ${OUTPUT_FILES} SYMBOLS ${SYMBOLS}) generate_embed_source(${EMBED_NAME} SRC ${SRC_FILE} HEADER ${HEADER_FILE} OBJECTS ${OUTPUT_FILES} SYMBOLS ${SYMBOLS})
add_library(${EMBED_NAME} STATIC ${OUTPUT_FILES} "${SRC_FILE}") add_library(${EMBED_NAME} STATIC ${OUTPUT_FILES} "${SRC_FILE}")
target_include_directories(${EMBED_NAME} PUBLIC "${EMBED_DIR}/include") target_include_directories(${EMBED_NAME} PUBLIC "${EMBED_DIR}/include")
target_compile_options(${EMBED_NAME} PRIVATE -Wno-reserved-identifier) target_compile_options(${EMBED_NAME} PRIVATE -Wno-reserved-identifier -Wno-extern-initializer -Wno-missing-variable-declarations)
set_target_properties(${EMBED_NAME} PROPERTIES POSITION_INDEPENDENT_CODE On) set_target_properties(${EMBED_NAME} PROPERTIES POSITION_INDEPENDENT_CODE On)
endfunction() endfunction()
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -148,6 +148,18 @@ shape common_shape(const std::vector<shape>& shapes) ...@@ -148,6 +148,18 @@ shape common_shape(const std::vector<shape>& shapes)
return {compute_common_types(shapes), compute_common_lens(shapes)}; return {compute_common_types(shapes), compute_common_lens(shapes)};
} }
/**
* @brief Creates and adds instructions to convert input arguments to common shapes and types
* by adding multi-broadcast and type convert operations. This is a utility function for creating
* operations where the shape and type of inputs need to match. It supports both dynamic and
* static-shaped arguments.
*
* @param m containing module for instruction
* @param ins insertion location in instruction list
* @param inputs instructions to use as argument list; also, the shapes
* attached to each instruction_ref are considered for broadcasting
* @return std::vector<instruction_ref> a modified argument list
*/
std::vector<instruction_ref> std::vector<instruction_ref>
insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref> inputs) insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref> inputs)
{ {
...@@ -158,7 +170,7 @@ insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref> ...@@ -158,7 +170,7 @@ insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref>
if(inputs.size() != 2) if(inputs.size() != 2)
{ {
MIGRAPHX_THROW("INSERT_COMMON_OP: not handled; " + migraphx::to_string(inputs.size()) + MIGRAPHX_THROW("INSERT_COMMON_OP: not handled; " + migraphx::to_string(inputs.size()) +
"inputs, only handle two inputs if any are dynamic shape"); " inputs. Requires exactly two inputs if any are dynamic shape");
} }
auto c_type = compute_common_types(to_shapes(inputs)); auto c_type = compute_common_types(to_shapes(inputs));
...@@ -224,6 +236,9 @@ instruction_ref insert_common_op(module& m, ...@@ -224,6 +236,9 @@ instruction_ref insert_common_op(module& m,
return m.insert_instruction(ins, op, insert_common_args(m, ins, std::move(inputs))); return m.insert_instruction(ins, op, insert_common_args(m, ins, std::move(inputs)));
} }
/**
* Wrapper for insert_common_args() which inserts operation at the end of the module.
*/
instruction_ref add_common_op(module& m, const operation& op, std::vector<instruction_ref> inputs) instruction_ref add_common_op(module& m, const operation& op, std::vector<instruction_ref> inputs)
{ {
return insert_common_op(m, m.end(), op, std::move(inputs)); return insert_common_op(m, m.end(), op, std::move(inputs));
......
...@@ -455,8 +455,29 @@ struct compiler ...@@ -455,8 +455,29 @@ struct compiler
{ {
auto p = l.load(); auto p = l.load();
// Dont compile if its already been compiled // Dont compile if its already been compiled
if(p.is_compiled()) if(p.is_compiled())
{
if(ct.target_name == "gpu")
{
if(is_offload_copy_set(p) and not co.offload_copy)
{
std::cout << "MIGraphX program was likely compiled with offload_copy set, Try "
"passing "
"`--enable-offload-copy` if program run fails.\n";
}
else if(co.offload_copy)
{
std::cout << "MIGraphX program was likely compiled without "
"offload_copy set, Try "
"removing "
"`--enable-offload-copy` flag if passed to driver, if program run "
"fails.\n";
}
}
return p; return p;
}
auto t = ct.get_target(); auto t = ct.get_target();
if(to_fp16) if(to_fp16)
{ {
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#include "perf.hpp" #include "perf.hpp"
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#ifdef HAVE_GPU #ifdef HAVE_GPU
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
...@@ -97,6 +99,38 @@ target get_target(bool gpu) ...@@ -97,6 +99,38 @@ target get_target(bool gpu)
return make_target("cpu"); return make_target("cpu");
} }
bool is_offload_copy_set(const program& p)
{
assert(p.is_compiled());
const module* mm = p.get_main_module();
std::vector<std::string> param_names = mm->get_parameter_names();
std::unordered_set<instruction_ref> param_ins;
std::transform(param_names.begin(),
param_names.end(),
std::inserter(param_ins, param_ins.begin()),
[&](const auto& i) { return mm->get_parameter(i); });
for(const auto& i : *mm)
{
if(i.name() == "hip::copy_to_gpu")
{
auto copy_arg = instruction::get_output_alias(i.inputs().front(), true);
param_ins.erase(copy_arg);
}
else if(i.name() == "@return")
{
auto return_args = i.inputs();
for(const auto& j : return_args)
{
auto alias_ins = instruction::get_output_alias(j, true);
if((alias_ins->name() == "@param" && param_ins.erase(alias_ins) == 0) or
(alias_ins->name() != "hip::copy_from_gpu"))
return false;
}
}
}
return param_ins.empty();
}
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace driver } // namespace driver
} // namespace migraphx } // namespace migraphx
...@@ -39,6 +39,15 @@ parameter_map create_param_map(const program& p, const target& t, bool offload = ...@@ -39,6 +39,15 @@ parameter_map create_param_map(const program& p, const target& t, bool offload =
parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu); parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu);
parameter_map create_param_map(const program& p, bool gpu = true); parameter_map create_param_map(const program& p, bool gpu = true);
target get_target(bool gpu); target get_target(bool gpu);
/**
* @brief Checks if MIGraphX program compiled for "GPU" has offload_copy set of not. This is
intended to print a HINT for the users and would not always correctly classify compiled program as
with or without offload_copy in all cases.
* @param p Compiled MIGraphX program for GPU backend
* @return true if program is classified as compiled with "offload_copy" set
*/
bool is_offload_copy_set(const program& p);
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace driver } // namespace driver
......
...@@ -135,14 +135,14 @@ template <class M> ...@@ -135,14 +135,14 @@ template <class M>
auto bind_match(M m, std::string name) auto bind_match(M m, std::string name)
{ {
return make_function_matcher( return make_function_matcher(
[=, name = std::move(name)](matcher_context& ctx, [=, m_name = std::move(name)](matcher_context& ctx,
instruction_ref ins) -> optional<instruction_ref> { instruction_ref ins) -> optional<instruction_ref> {
auto result = m.match(ctx, ins); auto result = m.match(ctx, ins);
if(result) if(result)
{ {
if(not ctx.has_instruction(ins)) if(not ctx.has_instruction(ins))
return nullopt; return nullopt;
ctx.instructions[name] = ins; ctx.instructions[m_name] = ins;
} }
return result; return result;
}); });
...@@ -655,9 +655,9 @@ auto skip_output(Ms... ms) ...@@ -655,9 +655,9 @@ auto skip_output(Ms... ms)
inline auto var(std::string s) inline auto var(std::string s)
{ {
return make_basic_fun_matcher( return make_basic_fun_matcher(
[=, s = std::move(s)](const matcher_context& ctx, [=, m_s = std::move(s)](const matcher_context& ctx,
instruction_ref) -> optional<instruction_ref> { instruction_ref) -> optional<instruction_ref> {
auto it = ctx.instructions.find(s); auto it = ctx.instructions.find(m_s);
if(it == ctx.instructions.end()) if(it == ctx.instructions.end())
return nullopt; return nullopt;
return it->second; return it->second;
...@@ -667,7 +667,7 @@ inline auto var(std::string s) ...@@ -667,7 +667,7 @@ inline auto var(std::string s)
inline auto name(std::string s) inline auto name(std::string s)
{ {
return make_basic_pred_matcher( return make_basic_pred_matcher(
[=, s = std::move(s)](instruction_ref ins) { return ins->name() == s; }); [=, m_s = std::move(s)](instruction_ref ins) { return ins->name() == m_s; });
} }
inline auto name_contains(const std::string& name) inline auto name_contains(const std::string& name)
...@@ -678,8 +678,8 @@ inline auto name_contains(const std::string& name) ...@@ -678,8 +678,8 @@ inline auto name_contains(const std::string& name)
inline auto name(std::unordered_set<std::string> names) inline auto name(std::unordered_set<std::string> names)
{ {
return make_basic_pred_matcher([=, names = std::move(names)](instruction_ref ins) { return make_basic_pred_matcher([=, m_names = std::move(names)](instruction_ref ins) {
return names.count(ins->name()) > 0; return m_names.count(ins->name()) > 0;
}); });
} }
......
...@@ -68,6 +68,9 @@ struct broadcast ...@@ -68,6 +68,9 @@ struct broadcast
{ {
// the ONNX broadcast op is deprecated now, so not handling the negative // the ONNX broadcast op is deprecated now, so not handling the negative
// value of axis anymore // value of axis anymore
if(s0.dynamic())
MIGRAPHX_THROW(
"BROADCAST: Single dynamic input shape not supported. Use two inputs.");
if(axis >= broadcast_lens.size()) if(axis >= broadcast_lens.size())
{ {
MIGRAPHX_THROW("BROADCAST : axis " + migraphx::to_string(axis) + MIGRAPHX_THROW("BROADCAST : axis " + migraphx::to_string(axis) +
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -37,8 +37,10 @@ namespace op { ...@@ -37,8 +37,10 @@ namespace op {
/** /**
* Broadcast multiple dimensions between two tensors. * Broadcast multiple dimensions between two tensors.
* Two versions of this operator: one input and two inputs. * Two versions of this operator: one input and two inputs.
* One input version uses output_lens attribute and broadcasts to it. * One input version uses output_lens attribute and broadcasts to it (does not support
* Two inputs version broadcasts both inputs to the common shape at evaluation time. * dynamic shape input).
*
* Two inputs version broadcasts the first input to the common shape of the two inputs.
*/ */
struct multibroadcast struct multibroadcast
{ {
...@@ -81,6 +83,9 @@ struct multibroadcast ...@@ -81,6 +83,9 @@ struct multibroadcast
if(inputs.size() == 1) if(inputs.size() == 1)
{ {
if(s0.dynamic())
MIGRAPHX_THROW(
"MULTIBROADCAST: Single dynamic input shape not supported. Use two inputs.");
if(s0.lens().size() > output_lens.size()) if(s0.lens().size() > output_lens.size())
{ {
MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should <= output size"); MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should <= output size");
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp> #include <migraphx/dyn_output.hpp>
#include <migraphx/optional.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -96,9 +97,115 @@ struct reshape ...@@ -96,9 +97,115 @@ struct reshape
return {s0.type(), output_dyn_dims}; return {s0.type(), output_dyn_dims};
} }
template <class Iterator>
static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
{
std::size_t x = 1;
auto it = std::find_if(start, last, [&](auto i) {
x *= i;
return x >= dim;
});
if(x != dim)
return start;
return it;
}
template <class DimIterator, class StrideIterator>
static auto can_strides_merge(DimIterator dim_start,
DimIterator dim_last,
StrideIterator stride_start,
StrideIterator stride_last)
{
assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
auto cstride = *std::prev(stride_last);
return std::equal(std::make_reverse_iterator(dim_last),
std::make_reverse_iterator(dim_start + 1),
std::make_reverse_iterator(stride_last - 1),
std::make_reverse_iterator(stride_start),
[&](auto dim, auto stride) {
cstride *= dim;
return stride == cstride;
});
}
// This will reshape the dimesions of the input shape to use the lens of
// `rdims`. If this can't be done without changing memory layout then it
// will return nullopt
static optional<shape> reshape_dims(const shape& input, const std::vector<std::size_t>& rdims)
{
if(input.standard())
return shape{input.type(), rdims};
const auto& idims = input.lens();
const auto& istrides = input.strides();
std::vector<std::size_t> rstrides;
std::size_t i = 0;
std::size_t r = 0;
while(i < idims.size() and r < rdims.size())
{
auto idim = idims[i];
auto rdim = rdims[r];
if(rdim == idim)
{
rstrides.push_back(istrides[i]);
}
// squeeze
else if(rdim > idim)
{
auto start = idims.begin() + i;
auto it = compute_end_dim(start, idims.end(), rdim);
if(it == start)
return nullopt;
auto n = it - start;
assert((i + n) <= istrides.size());
if(not can_strides_merge(
start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
return nullopt;
i += n;
rstrides.push_back(istrides[i]);
}
// unsqueeze
else // if(rdim < idim)
{
auto start = rdims.begin() + i;
auto it = compute_end_dim(start, rdims.end(), idim);
if(it == start)
return nullopt;
auto n = it - start;
assert((r + n) <= rdims.size());
auto stride = istrides[i] * idim;
std::for_each(start, it + 1, [&](auto dim) {
stride /= dim;
rstrides.push_back(stride);
});
r += n;
}
i++;
r++;
}
// Handle trailing 1s
if(rstrides.size() < rdims.size() and not rstrides.empty())
{
auto stride = rstrides.back();
for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
{
if(d != 1)
return nullopt;
rstrides.push_back(stride);
}
}
if(rdims.size() != rstrides.size())
return nullopt;
return shape{input.type(), rdims, rstrides};
}
shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
{ {
check_shapes{inputs, *this}.standard(); check_shapes{inputs, *this}.has(1);
auto&& idims = inputs.front().lens(); auto&& idims = inputs.front().lens();
std::vector<std::size_t> rdims(dims.begin(), dims.end()); std::vector<std::size_t> rdims(dims.begin(), dims.end());
...@@ -125,12 +232,17 @@ struct reshape ...@@ -125,12 +232,17 @@ struct reshape
} }
} }
shape s{inputs.front().type(), rdims}; auto s = reshape_dims(inputs.front(), rdims);
if(s.elements() != inputs.front().elements()) if(not s.has_value())
MIGRAPHX_THROW("Reshape on axis that is not packed.");
if(s->elements() != inputs.front().elements())
MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " + MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " +
std::to_string(s.elements()) + " elements whereas the input has " + std::to_string(s->elements()) + " elements whereas the input has " +
std::to_string(inputs.front().elements())); std::to_string(inputs.front().elements()));
return s;
assert(s->bytes() == inputs.front().bytes());
return *s;
} }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
......
...@@ -183,7 +183,7 @@ struct shape ...@@ -183,7 +183,7 @@ struct shape
const std::vector<std::size_t>& strides() const; const std::vector<std::size_t>& strides() const;
/*! /*!
* The number of dimensions in the shape. * The number of dimensions in the shape, either static or dynamic.
* Same as the number of indices required to get a data value. * Same as the number of indices required to get a data value.
*/ */
std::size_t ndim() const; std::size_t ndim() const;
......
...@@ -149,6 +149,25 @@ instruction_ref onnx_parser::node_info::add_broadcastable_binary_op(const std::s ...@@ -149,6 +149,25 @@ instruction_ref onnx_parser::node_info::add_broadcastable_binary_op(const std::s
return this->add_common_op(op_name, arg0, arg1); return this->add_common_op(op_name, arg0, arg1);
} }
/**
* @brief A wrapper for insert_common_args(), which constructs an argument list
* and inserts multibroadcast and convert ops to match inputs to a common shape and type
* as required. The requested operation is placed after the added multibroadcast and convert ops,
* if any, so that their results are transparent to the programmer.
*
* Use add_common_op() to match input sizes when inputs may be
* either static or dynamic.
*
* @param op_name string; Name of operation (op) to add; valid names are the same as
* for make_op()
*
* @param inputs vector of instruction_ref. List of instructions for the new
* operator. Multibroadcast and convert operations, if needed, are deduced from these too.
*
* @return instruction_ref Returns an instruction_ref which is the result of the requested
* operation.
*
*/
instruction_ref onnx_parser::node_info::add_common_op(const std::string& op_name, instruction_ref onnx_parser::node_info::add_common_op(const std::string& op_name,
std::vector<instruction_ref> inputs) const std::vector<instruction_ref> inputs) const
{ {
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -84,16 +84,17 @@ struct parse_instancenorm : op_parser<parse_instancenorm> ...@@ -84,16 +84,17 @@ struct parse_instancenorm : op_parser<parse_instancenorm>
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " + std::to_string(dtype) + MIGRAPHX_THROW(opd.op_name + ": invalid output type: " + std::to_string(dtype) +
". Valid types are 1 (float), 10 (half), and 11 (double)."); ". Valid types are 1 (float), 10 (half), and 11 (double).");
auto ndims = dims.size(); bool dyn_input = x->get_shape().dynamic();
auto ndims = x->get_shape().ndim();
assert(ndims >= 2); assert(ndims >= 2);
auto kdims = ndims - 2; auto kdims = ndims - 2;
std::vector<int64_t> axes(kdims); std::vector<int64_t> axes(kdims);
std::iota(axes.begin(), axes.end(), 2); std::iota(axes.begin(), axes.end(), 2);
auto mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x); auto mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x);
auto mean_bcast =
info.add_instruction(make_op("multibroadcast", {{"out_lens", dims}}), mean); // Use add_common_op() to insert multibroadcast/convert instructions where needed when
auto l1 = info.add_instruction(make_op("sub"), x, mean_bcast); // inputs may be either static or dynamic.
auto l1 = info.add_common_op("sub", x, mean);
// for the fp16, if not converting to fp32 then divide `x` and `mean` by `sqrt(n)` and take // for the fp16, if not converting to fp32 then divide `x` and `mean` by `sqrt(n)` and take
// reduce_sum to calculate variance i.e. // reduce_sum to calculate variance i.e.
// var = reduce_sum((x/s_n - mean/s_n)^2) where s_n = sqrt(n) // var = reduce_sum((x/s_n - mean/s_n)^2) where s_n = sqrt(n)
...@@ -107,23 +108,32 @@ struct parse_instancenorm : op_parser<parse_instancenorm> ...@@ -107,23 +108,32 @@ struct parse_instancenorm : op_parser<parse_instancenorm>
}); });
n = 1.0 / std::sqrt(n); n = 1.0 / std::sqrt(n);
auto n_literal = info.add_literal(literal{dtype, {n}}); auto n_literal = info.add_literal(literal{dtype, {n}});
mean_bcast = info.add_common_op("mul", {mean_bcast, n_literal});
x = info.add_common_op("mul", {x, n_literal}); x = info.add_common_op("mul", {x, n_literal});
} }
auto l0 = info.add_instruction(make_op("sqdiff"), x, mean_bcast); auto l0 = info.add_common_op("sqdiff", x, mean);
auto variance = info.add_instruction(make_op(reduce_op_name, {{"axes", axes}}), l0); auto variance = info.add_instruction(make_op(reduce_op_name, {{"axes", axes}}), l0);
auto epsilon_literal = info.add_literal(literal{shape{literal_dtype}, {epsilon}}); auto epsilon_literal = info.add_literal(literal{shape{literal_dtype}, {epsilon}});
auto epsilon_bcast = auto l2 = info.add_common_op("add", variance, epsilon_literal);
info.add_instruction(make_op("multibroadcast", {{"out_lens", dims}}), epsilon_literal);
auto variance_bcast =
info.add_instruction(make_op("multibroadcast", {{"out_lens", dims}}), variance);
auto l2 = info.add_instruction(make_op("add"), variance_bcast, epsilon_bcast);
auto l3 = info.add_instruction(make_op("rsqrt"), l2); auto l3 = info.add_instruction(make_op("rsqrt"), l2);
auto l4 = info.add_instruction(make_op("mul"), l1, l3); auto l4 = info.add_common_op("mul", l1, l3);
auto scale_bcast =
info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), scale); // add_common_op() doesn't apply the plain broadcast op, so we add that op explicitly for
auto bias_bcast = // both scale and bias.
info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), bias); instruction_ref scale_bcast;
instruction_ref bias_bcast;
if(dyn_input)
{
scale_bcast = info.add_instruction(make_op("broadcast", {{"axis", 1}}), scale, x);
bias_bcast = info.add_instruction(make_op("broadcast", {{"axis", 1}}), bias, x);
}
else
{
scale_bcast = info.add_instruction(
make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), scale);
bias_bcast =
info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), bias);
}
auto l5 = info.add_instruction(make_op("mul"), l4, scale_bcast); auto l5 = info.add_instruction(make_op("mul"), l4, scale_bcast);
auto ret = info.add_instruction(make_op("add"), l5, bias_bcast); auto ret = info.add_instruction(make_op("add"), l5, bias_bcast);
if(dtype == shape::half_type and convert_fp16) if(dtype == shape::half_type and convert_fp16)
......
...@@ -539,39 +539,53 @@ std::vector<argument> program::eval(parameter_map params, execution_environment ...@@ -539,39 +539,53 @@ std::vector<argument> program::eval(parameter_map params, execution_environment
ins_out[x] = ss.str(); ins_out[x] = ss.str();
}); });
ret = generic_eval(*this, ret = generic_eval(
ctx, *this,
std::move(params), ctx,
with_check_context([&](auto& ins, auto f, auto&& check_context) { std::move(params),
ctx.finish(); with_check_context([&](auto& ins, auto f, auto&& check_context) {
std::cout << "Run instruction: " << ins_out.at(ins) << std::endl; ctx.finish();
timer t{}; std::cout << "Run instruction: " << ins_out.at(ins) << std::endl;
auto result = check_context(f); timer t{};
double t1 = t.record<milliseconds>(); auto result = check_context(f);
ctx.finish(); double t1 = t.record<milliseconds>();
double t2 = t.record<milliseconds>(); ctx.finish();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl; double t2 = t.record<milliseconds>();
if(trace_level > 1 and ins->name().front() != '@' and std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
ins->name() != "load" and not result.empty()) if(trace_level > 1 and ins->name().front() != '@' and ins->name() != "load" and
{ not result.empty())
target tgt = make_target(this->impl->target_name); {
auto buffer = tgt.copy_from(result); migraphx::argument buffer;
if(trace_level == 2) try
{ {
std::cout << "Output has " target tgt = make_target(this->impl->target_name);
<< to_string_range(classify_argument(buffer)) buffer = tgt.copy_from(result);
<< std::endl; }
std::cout << "Output: "; catch(const migraphx::exception&)
preview_argument(std::cout, buffer); {
std::cout << std::endl; // instruction was run on host then no need to copy buffer from target
} buffer = result;
else }
{ catch(...)
std::cout << "Output: " << buffer << std::endl; {
} MIGRAPHX_THROW(
} "MIGraphX program execution with MIGRAPHX_TRACE_EVAL failed.\n");
return result; }
})); if(trace_level == 2)
{
std::cout << "Output has " << to_string_range(classify_argument(buffer))
<< std::endl;
std::cout << "Output: ";
preview_argument(std::cout, buffer);
std::cout << std::endl;
}
else
{
std::cout << "Output: " << buffer << std::endl;
}
}
return result;
}));
} }
else else
{ {
......
...@@ -35,12 +35,11 @@ endif() ...@@ -35,12 +35,11 @@ endif()
find_package(composable_kernel 1.0.0 COMPONENTS jit_library REQUIRED) find_package(composable_kernel 1.0.0 COMPONENTS jit_library REQUIRED)
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs") if(BUILD_DEV)
#if(BUILD_DEV) set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs")
#set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs") else()
#else() set(MIGRAPHX_USE_HIPRTC ON CACHE BOOL "Use hipRTC APIs")
#set(MIGRAPHX_USE_HIPRTC ON CACHE BOOL "Use hipRTC APIs") endif()
#endif()
include(Embed) include(Embed)
file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS} file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
......
...@@ -146,7 +146,11 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz) ...@@ -146,7 +146,11 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
gpu_sync(); gpu_sync();
std::vector<T> result(sz); std::vector<T> result(sz);
assert(not is_device_ptr(result.data())); assert(not is_device_ptr(result.data()));
assert(is_device_ptr(x)); if(not is_device_ptr(x))
{
MIGRAPHX_THROW(
"read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed\n");
}
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost); auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <fstream> #include <fstream>
#include <filesystem> #include <migraphx/filesystem.hpp>
#include <migraphx/gpu/compiler.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
......
...@@ -188,10 +188,8 @@ MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, max, ::max) ...@@ -188,10 +188,8 @@ MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, max, ::max)
MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, min, ::min) MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, min, ::min)
MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, max, ::max) MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, max, ::max)
MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, min, ::min) MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, min, ::min)
// Add overloads for half that calls the float version, this should use "hmax" and "hmin" once MIGRAPHX_DEVICE_MATH_BINARY_FOR(migraphx::half, max, ::__hmax)
// perf CI docker is upgraded to rocm-5.5 MIGRAPHX_DEVICE_MATH_BINARY_FOR(migraphx::half, min, ::__hmin)
MIGRAPHX_DEVICE_MATH_BINARY_FOR(migraphx::half, max, ::fmaxf)
MIGRAPHX_DEVICE_MATH_BINARY_FOR(migraphx::half, min, ::fminf)
template <class T, MIGRAPHX_REQUIRES(not is_any_vec<T>())> template <class T, MIGRAPHX_REQUIRES(not is_any_vec<T>())>
constexpr auto max(const T& a, const T& b) constexpr auto max(const T& a, const T& b)
......
...@@ -527,6 +527,9 @@ std::size_t value_hash(const std::string& key, const T& x) ...@@ -527,6 +527,9 @@ std::size_t value_hash(const std::string& key, const T& x)
hash_combine(h, x); hash_combine(h, x);
return h; return h;
} }
std::size_t value_hash(const std::string& key, std::nullptr_t) { return hash_value(key); }
std::size_t value_hash(const std::string& key, const std::vector<value>& x) std::size_t value_hash(const std::string& key, const std::vector<value>& x)
{ {
std::size_t h = hash_value(key); std::size_t h = hash_value(key);
......
...@@ -151,7 +151,7 @@ TEST_CASE(dynamic_batch_load_and_run_offload) ...@@ -151,7 +151,7 @@ TEST_CASE(dynamic_batch_load_and_run_offload)
c_options.set_offload_copy(); c_options.set_offload_copy();
p.compile(migraphx::target("gpu"), c_options); p.compile(migraphx::target("gpu"), c_options);
auto out_shapes = p.get_output_shapes(); auto out_shapes = p.get_output_shapes();
CHECK(out_shapes.size() == 1); EXPECT(out_shapes.size() == 1);
EXPECT(out_shapes[0].dynamic()); EXPECT(out_shapes[0].dynamic());
// batch size = 2 // batch size = 2
...@@ -165,9 +165,9 @@ TEST_CASE(dynamic_batch_load_and_run_offload) ...@@ -165,9 +165,9 @@ TEST_CASE(dynamic_batch_load_and_run_offload)
migraphx::argument(migraphx::shape(migraphx_shape_float_type, {2, 3, 3, 3}), c.data())); migraphx::argument(migraphx::shape(migraphx_shape_float_type, {2, 3, 3, 3}), c.data()));
auto outputs = p.eval(pp); auto outputs = p.eval(pp);
CHECK(shapes_before.size() == outputs.size()); EXPECT(shapes_before.size() == outputs.size());
CHECK(bool{outputs.front().get_shape() == EXPECT(bool{outputs.front().get_shape() ==
migraphx::shape(migraphx_shape_float_type, {2, 1, 3, 3})}); migraphx::shape(migraphx_shape_float_type, {2, 2, 2, 2})});
} }
TEST_CASE(load_and_run_async) TEST_CASE(load_and_run_async)
......
...@@ -3341,6 +3341,39 @@ def instance_norm_type_mismatch_test(): ...@@ -3341,6 +3341,39 @@ def instance_norm_type_mismatch_test():
return ([node], [x, scale, bias], [y]) return ([node], [x, scale, bias], [y])
@onnx_test()
def instance_norm_dyn_batch_test():
# the batch size is a dynamic dimension
x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [None, 2, 3, 3])
scale = helper.make_tensor_value_info('1', TensorProto.FLOAT, [2])
bias = helper.make_tensor_value_info('2', TensorProto.FLOAT, [2])
y = helper.make_tensor_value_info('3', TensorProto.FLOAT, [None, 2, 3, 3])
node = onnx.helper.make_node('InstanceNormalization',
inputs=['0', '1', '2'],
outputs=['3'])
return ([node], [x, scale, bias], [y])
return ([node], [x, scale, bias], [y])
@onnx_test()
def instance_norm_dyn_batch_half_test():
# the batch size is a dynamic dimension
x = helper.make_tensor_value_info('0', TensorProto.FLOAT16,
[None, 2, 3, 3])
scale = helper.make_tensor_value_info('1', TensorProto.FLOAT16, [2])
bias = helper.make_tensor_value_info('2', TensorProto.FLOAT16, [2])
y = helper.make_tensor_value_info('3', TensorProto.FLOAT16,
[None, 2, 3, 3])
node = onnx.helper.make_node('InstanceNormalization',
inputs=['0', '1', '2'],
outputs=['3'])
return ([node], [x, scale, bias], [y])
@onnx_test() @onnx_test()
def instance_norm_invalid_type_test(): def instance_norm_invalid_type_test():
x = helper.make_tensor_value_info('0', TensorProto.INT32, [1, 2, 3, 3]) x = helper.make_tensor_value_info('0', TensorProto.INT32, [1, 2, 3, 3])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment