Commit 7702c20d authored by Paul's avatar Paul
Browse files

Merge

parents c362e7fa 9afce86d
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
#ifdef MIGRAPHX_MLIR
struct mlir_conv
{
operation op = make_op("convolution");
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"));
}
std::string name() const { return "gpu::mlir_conv"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
check_shapes{inputs, *this}.standard();
if(mods.size() != 1)
MIGRAPHX_THROW("should have one submodule.");
if(inputs.size() < 2)
MIGRAPHX_THROW("should have at least two inputs.");
auto n = inputs.size();
return op.compute_shape({inputs[n - 2], inputs[n - 1]});
}
};
MIGRAPHX_REGISTER_OP(mlir_conv);
namespace {
struct find_conv_pointwise
{
// Find a convolution followed by a pointwise operation.
auto matcher() const
{
auto convolution =
match::skip(match::name("contiguous"))(match::name("convolution").bind("convolution"));
return match::name("pointwise")(match::any_of[match::inputs()](convolution.bind("x")));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto ins = r.result;
auto conv_ins = r.instructions["convolution"];
auto x_ins = r.instructions["x"]; // input after contiguous
auto* pm = ins->module_inputs().front();
auto names = pm->get_parameter_names();
// Whitelist pointwise operators
if(std::any_of(pm->begin(), pm->end(), [](const auto& i) {
return not contains({"@literal", "@param", "@return", "convolution", "add", "relu"},
i.name());
}))
return;
// Only fuse with fp32 for now
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
return i->get_shape().type() != shape::type_t::float_type;
}))
return;
std::sort(names.begin(), names.end());
module_ref mm = mpm.create_module("mlir_" + pm->name());
mm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> param_map;
auto x = mm->add_parameter("x" + std::to_string(names.size()),
conv_ins->inputs().at(0)->get_shape());
auto w = mm->add_parameter("x" + std::to_string(names.size() + 1),
conv_ins->inputs().at(1)->get_shape());
auto conv = mm->add_instruction(conv_ins->get_operator(), {x, w});
std::transform(names.begin(),
names.end(),
ins->inputs().begin(),
std::inserter(param_map, param_map.end()),
[&](auto name, auto input) {
if(input == x_ins)
return std::make_pair(pm->get_parameter(name), conv);
return std::make_pair(pm->get_parameter(name),
mm->add_parameter(name, input->get_shape()));
});
mm->add_return(mm->insert_instructions(mm->end(), pm, param_map));
std::vector<instruction_ref> inputs;
std::copy_if(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(inputs),
[&](auto input) { return input != conv_ins; });
inputs.insert(inputs.end(), conv_ins->inputs().begin(), conv_ins->inputs().end());
mpm.get_module().replace_instruction(
ins, mlir_conv{conv_ins->get_operator()}, inputs, {mm});
}
};
} // namespace
#endif
void fuse_mlir::apply(module_pass_manager& mpm) const
{
#ifdef MIGRAPHX_MLIR
match::find_matches(mpm, find_conv_pointwise{});
#else
(void)mpm;
#endif
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -336,6 +336,7 @@ void move_standard_front(std::vector<instruction_ref>& args) ...@@ -336,6 +336,7 @@ void move_standard_front(std::vector<instruction_ref>& args)
auto gpu_name(const std::string& s) { return match::name("gpu::" + s); } auto gpu_name(const std::string& s) { return match::name("gpu::" + s); }
namespace {
struct find_layernorm struct find_layernorm
{ {
auto matcher() const { return match::layernorm(&gpu_name); } auto matcher() const { return match::layernorm(&gpu_name); }
...@@ -837,15 +838,6 @@ inline auto precompile_name(Strings... names) // NOLINT ...@@ -837,15 +838,6 @@ inline auto precompile_name(Strings... names) // NOLINT
}); });
} }
template <class... Ms>
auto conv_bias_pointwise(Ms... ms)
{
return precompile_name("pointwise")(
match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"),
fusable_conv(match::used_once()).bind("conv")),
ms...);
}
struct find_conv_bias struct find_conv_bias
{ {
context* ctx = nullptr; context* ctx = nullptr;
...@@ -1014,6 +1006,7 @@ struct find_commutative_broadcast ...@@ -1014,6 +1006,7 @@ struct find_commutative_broadcast
m.replace_instruction(ins, ins->get_operator(), args); m.replace_instruction(ins, ins->get_operator(), args);
} }
}; };
} // namespace
struct find_contiguous struct find_contiguous
{ {
......
...@@ -23,13 +23,13 @@ ...@@ -23,13 +23,13 @@
*/ */
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
#include <migraphx/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/contiguous.hpp> #include <migraphx/gpu/device/contiguous.hpp>
#include <miopen/miopen.h> #include <miopen/miopen.h>
#include <memory>
#include <mutex>
#include <vector> #include <vector>
namespace migraphx { namespace migraphx {
...@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr) ...@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr)
return result; return result;
} }
hip_ptr allocate_gpu(std::size_t sz, bool host = false) struct host_ptr_cache
{
std::unordered_map<void*, std::weak_ptr<void>> cache;
std::mutex m;
std::shared_ptr<void> get(void* ptr)
{
std::lock_guard<std::mutex> lock(m);
auto it = cache.find(ptr);
if(it != cache.end())
return it->second.lock();
return nullptr;
}
void put(const std::shared_ptr<void>& p)
{
std::lock_guard<std::mutex> lock(m);
cache[p.get()] = p;
}
};
static host_ptr_cache& get_host_ptr_cache()
{
static host_ptr_cache cache;
return cache;
}
std::shared_ptr<void> allocate_gpu(std::size_t sz, bool host = false)
{ {
if(sz > get_available_gpu_memory()) if(sz > get_available_gpu_memory())
MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz)); MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
void* result = nullptr; void* alloc_ptr = nullptr;
auto status = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz); auto status = host ? hipHostMalloc(&alloc_ptr, sz) : hipMalloc(&alloc_ptr, sz);
if(status != hipSuccess) if(status != hipSuccess)
{ {
if(host) if(host)
...@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
else else
return allocate_gpu(sz, true); return allocate_gpu(sz, true);
} }
assert(result != nullptr); assert(alloc_ptr != nullptr);
return hip_ptr{result}; std::shared_ptr<void> result = share(hip_ptr{alloc_ptr});
if(host)
{
get_host_ptr_cache().put(result);
}
return result;
} }
hip_host_ptr register_on_gpu(void* ptr, std::size_t sz) std::shared_ptr<void> register_on_gpu(void* ptr, std::size_t sz)
{ {
std::shared_ptr<void> result = get_host_ptr_cache().get(ptr);
if(result)
{
return result;
}
auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped); auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPHX_THROW("Gpu register failed: " + hip_error(status)); MIGRAPHX_THROW("Gpu register failed: " + hip_error(status));
return hip_host_ptr{ptr}; result = share(hip_host_ptr{ptr});
get_host_ptr_cache().put(result);
return result;
} }
template <class T> template <class T>
...@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz) ...@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
return result; return result;
} }
hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) std::shared_ptr<void> write_to_gpu(const void* x, std::size_t sz, bool host = false)
{ {
gpu_sync(); gpu_sync();
auto result = allocate_gpu(sz, host); auto result = allocate_gpu(sz, host);
...@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x) ...@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x)
argument allocate_gpu(const shape& s, bool host) argument allocate_gpu(const shape& s, bool host)
{ {
auto p = share(allocate_gpu(s.bytes() + 1, host)); auto p = allocate_gpu(s.bytes() + 1, host);
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }}; return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
} }
argument register_on_gpu(const argument& arg) argument register_on_gpu(const argument& arg)
{ {
auto arg_shared = arg.share(); auto arg_shared = arg.share();
auto p = share(register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes())); auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes());
return {arg_shared.get_shape(), [p, a = std::move(arg_shared)]() mutable { return {arg_shared.get_shape(),
return get_device_ptr(p.get()); [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
}}; // namespace gpu }
} // namespace MIGRAPHX_INLINE_NS
argument to_gpu(const argument& arg, bool host) argument to_gpu(const argument& arg, bool host)
{ {
auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes(), host)); auto p = write_to_gpu(arg.data(), arg.get_shape().bytes(), host);
return {arg.get_shape(), p}; return {arg.get_shape(), p};
} }
......
...@@ -38,12 +38,13 @@ struct context; ...@@ -38,12 +38,13 @@ struct context;
struct code_object_op struct code_object_op
{ {
value::binary code_object; value::binary code_object{};
std::string symbol_name; std::string symbol_name = "";
std::size_t global; std::size_t global = 0;
std::size_t local; std::size_t local = 0;
std::vector<shape> expected_inputs; std::vector<shape> expected_inputs{};
shape output; shape output{};
std::int64_t output_arg = -1;
kernel k{}; kernel k{};
template <class Self, class F> template <class Self, class F>
...@@ -66,9 +67,13 @@ struct code_object_op ...@@ -66,9 +67,13 @@ struct code_object_op
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
void finalize(context&, const shape&, const std::vector<shape>&); void finalize(context&, const shape&, const std::vector<shape>&);
std::int64_t get_output_arg(std::size_t n) const
{
return output_arg < 0 ? n + output_arg : output_arg;
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{ {
return shapes.size() - 1; return get_output_arg(shapes.size());
} }
friend std::ostream& operator<<(std::ostream& os, const code_object_op& op) friend std::ostream& operator<<(std::ostream& os, const code_object_op& op)
......
...@@ -39,20 +39,20 @@ struct miopen_deconvolution ...@@ -39,20 +39,20 @@ struct miopen_deconvolution
op::deconvolution op; op::deconvolution op;
shared<convolution_descriptor> cd; shared<convolution_descriptor> cd;
miopenConvFwdAlgorithm_t algo{}; miopenConvFwdAlgorithm_t algo{};
miopenHandle_t handle = nullptr; uint64_t solution_id = 0;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
// TODO: Add algo return pack_join(op::deconvolution::reflect(self.op, f),
return op::convolution::reflect(self.op, f); pack(f(self.solution_id, "solution_id")));
} }
std::string name() const { return "gpu::deconv"; } std::string name() const { return "gpu::deconv"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs); shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs); void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{ {
......
...@@ -33,6 +33,8 @@ namespace gpu { ...@@ -33,6 +33,8 @@ namespace gpu {
std::string get_device_name(); std::string get_device_name();
int get_device_id();
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_MLIR_CONV_HPP #ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_MLIR_CONV_HPP #define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
...@@ -30,18 +30,19 @@ ...@@ -30,18 +30,19 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
struct module; struct module_pass_manager;
namespace gpu { namespace gpu {
struct mlir_conv
struct fuse_mlir
{ {
context* ctx; context* ctx = nullptr;
std::string name() const { return "mlir::convolution"; } std::string name() const { return "gpu::fuse_mlir"; }
void apply(module& m) const; void apply(module_pass_manager& mpm) const;
}; };
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
#endif
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP #define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/quant_dot.hpp> #include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <utility> #include <utility>
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP #define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/quant_dot.hpp> #include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <utility> #include <utility>
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_MLIR_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_MLIR_HPP
#include <string>
#include <vector>
#include <migraphx/config.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/instruction_ref.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
std::string dump_mlir(const module& m);
code_object_op compile_mlir(const context& ctx, const module& m);
instruction_ref insert_mlir(module& m,
instruction_ref ins,
code_object_op co,
const std::vector<instruction_ref>& inputs);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_PERFDB_HPP
#define MIGRAPHX_GUARD_GPU_PERFDB_HPP
#include <migraphx/config.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/operation.hpp>
#include <string>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct problem_params
{
operation op;
std::vector<shape> inputs;
shape output;
};
std::string get_mlir_perf_for_conv(const problem_params& pp);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PERFDB_HPP
...@@ -41,7 +41,7 @@ struct miopen_quant_convolution ...@@ -41,7 +41,7 @@ struct miopen_quant_convolution
bool int8_x4_format = false; bool int8_x4_format = false;
shared<convolution_descriptor> cd; shared<convolution_descriptor> cd;
miopenConvFwdAlgorithm_t algo{}; miopenConvFwdAlgorithm_t algo{};
miopenHandle_t handle = nullptr; uint64_t solution_id = 0;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
...@@ -55,7 +55,7 @@ struct miopen_quant_convolution ...@@ -55,7 +55,7 @@ struct miopen_quant_convolution
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs); shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs); void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{ {
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp> #include <migraphx/gpu/compile_hip_code_object.hpp>
...@@ -61,7 +84,7 @@ struct layernorm_compiler : compiler<layernorm_compiler> ...@@ -61,7 +84,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
} }
auto preloads = preload::broadcasts(axis, inputs); auto preloads = preload::broadcasts(axis, inputs);
auto relements = inputs[0].lens()[axis] / vec.size; auto relements = inputs[0].lens()[axis] / vec.size;
auto nelements = inputs.back().elements() / inputs[0].lens()[axis]; auto nelements = (inputs.back().elements() / inputs[0].lens()[axis]);
auto block_size = compute_block_size(relements, 256); auto block_size = compute_block_size(relements, 256);
hip_compile_options options; hip_compile_options options;
options.set_launch_params( options.set_launch_params(
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/mlir.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct mlir_compiler : compiler<mlir_compiler>
{
std::vector<std::string> names() const { return {"gpu::mlir_conv"}; }
operation compile_op(context&, const std::vector<shape>&, const value&) const { return {}; }
compiler_replace compile(context& ctx, instruction_ref ins, const operation&) const
{
auto* smod = ins->module_inputs().front();
assert(smod->get_parameter_names().size() == ins->inputs().size() - 1);
return insert(compile_mlir(ctx, *smod));
}
compiler_replace insert(code_object_op co) const
{
return [co = std::move(co)](module& m, instruction_ref ins) {
auto mlir = insert_mlir(m, ins, co, ins->inputs());
m.replace_instruction(ins, mlir);
};
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp> #include <migraphx/gpu/compile_hip_code_object.hpp>
...@@ -52,12 +75,12 @@ struct softmax_compiler : compiler<softmax_compiler> ...@@ -52,12 +75,12 @@ struct softmax_compiler : compiler<softmax_compiler>
auto faxis = find_fast_axis({inputs.front()}); auto faxis = find_fast_axis({inputs.front()});
vectorize vec{}; vectorize vec{};
// Vectorize if the axis is a reduction axis // Vectorize if the axis is a reduction axis
if(inputs.back().lens()[faxis] == 1) if(faxis == axis)
{ {
vec = vectorize::elements(faxis, inputs); vec = vectorize::elements(faxis, inputs);
} }
auto relements = inputs[0].lens()[axis] / vec.size; auto relements = inputs[0].lens()[axis] / vec.size;
auto nelements = inputs.back().elements() / relements; auto nelements = (inputs.back().elements() / inputs[0].lens()[axis]);
auto block_size = compute_block_size(relements, 256); auto block_size = compute_block_size(relements, 256);
hip_compile_options options; hip_compile_options options;
options.set_launch_params( options.set_launch_params(
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <migraphx/kernels/hip.hpp> #include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp> #include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/type_traits.hpp>
namespace migraphx { namespace migraphx {
...@@ -53,29 +54,51 @@ struct index ...@@ -53,29 +54,51 @@ struct index
return blockDim.x; // NOLINT return blockDim.x; // NOLINT
} }
#endif #endif
template <class N, class Stride>
static constexpr auto max_stride_iterations(N n, Stride stride)
{
return (n - _c<1>) / stride + _c<1>;
}
template <class F> template <class F, class N, class Stride>
__device__ void global_stride(index_int n, F f) const static constexpr void for_stride(index_int start, N n, Stride stride, F f)
{ {
const auto stride = nglobal(); if constexpr(not is_integral<N>{} and not is_integral<Stride>{} and
for(index_int i = global; i < n; i += stride) max_stride_iterations(n, stride) == 1)
{ {
f(i); if constexpr(stride > n)
{
if(start < n)
f(start);
}
else
{
f(start);
}
}
else
{
for(index_int i = start; i < n; i += stride)
{
f(i);
}
} }
} }
template <class F> template <class F, class N>
__device__ void local_stride(index_int n, F f) const __device__ void global_stride(N n, F f) const
{ {
const auto stride = nlocal(); for_stride(global, n, nglobal(), f);
for(index_int i = local; i < n; i += stride) }
{
f(i); template <class F, class N>
} __device__ void local_stride(N n, F f) const
{
for_stride(local, n, nlocal(), f);
} }
}; };
inline __device__ index make_index() inline __device__ __attribute__((const)) index make_index()
{ {
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
} }
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP #ifndef MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP #define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#include <migraphx/kernels/reduce.hpp> #include <migraphx/kernels/reduce.hpp>
...@@ -23,11 +46,9 @@ __device__ void generic_binary_layernorm( ...@@ -23,11 +46,9 @@ __device__ void generic_binary_layernorm(
F compute, BinOp op, Output output, Input1 input1, Input2 input2, Inputs... inputs) F compute, BinOp op, Output output, Input1 input1, Input2 input2, Inputs... inputs)
{ {
using reduce_output = reduce::with_axis<Input1, Axis>; using reduce_output = reduce::with_axis<Input1, Axis>;
constexpr auto relements =
get_shape_c<Input1>{}.elements() / get_shape_c<reduce_output>{}.elements();
MIGRAPHX_ASSERT(relements > 0);
reduce::block::run<reduce_output>([&](auto, auto r) { reduce::block::run<reduce_output>([&](auto, auto r) {
using value_type = typename Input1::type; using value_type = typename Input1::type;
constexpr auto relements = r.template elements<Input1>();
auto means = r.reduce(op::sum{}, make_array<value_type>(0, 0), [&](auto x1, auto x2) { auto means = r.reduce(op::sum{}, make_array<value_type>(0, 0), [&](auto x1, auto x2) {
auto x = op(x1, x2); auto x = op(x1, x2);
return make_array(x, x * x) / value_type{relements}; return make_array(x, x * x) / value_type{relements};
......
...@@ -90,7 +90,7 @@ struct lowest ...@@ -90,7 +90,7 @@ struct lowest
template <class T> template <class T>
constexpr operator T() const constexpr operator T() const
{ {
return numeric_lowest<T>(); return numeric_lowest<vec_type<T>>();
} }
}; };
...@@ -99,7 +99,7 @@ struct highest ...@@ -99,7 +99,7 @@ struct highest
template <class T> template <class T>
constexpr operator T() const constexpr operator T() const
{ {
return numeric_max<T>(); return numeric_max<vec_type<T>>();
} }
}; };
} // namespace migraphx } // namespace migraphx
......
...@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx) ...@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx)
{ {
return make_transform([=](auto f, auto... xs) { return make_transform([=](auto f, auto... xs) {
auto invoke = [=](auto... ys) { auto invoke = [=](auto... ys) {
__syncthreads(); if constexpr((Bs or ...))
__syncthreads();
f(ys...); f(ys...);
}; };
join(invoke, preload_copy<Bs>(idx, xs)...); join(invoke, preload_copy<Bs>(idx, xs)...);
......
...@@ -224,6 +224,18 @@ struct block ...@@ -224,6 +224,18 @@ struct block
idx.local_stride(x.get_shape().elements(), [&](auto j) { f(x[j], xs[j]...); }); idx.local_stride(x.get_shape().elements(), [&](auto j) { f(x[j], xs[j]...); });
}); });
} }
template <class Input>
constexpr auto elements() const
{
using reduce_type = decltype(slicer(Input{}));
using value_type = typename Input::type;
constexpr auto relements = get_shape_c<reduce_type>{}.elements();
if constexpr(vec_size<value_type>() > 1)
return relements * vec_size<value_type>();
else
return relements;
}
}; };
template <class Slicer> template <class Slicer>
...@@ -281,6 +293,13 @@ struct lane ...@@ -281,6 +293,13 @@ struct lane
} }
}); });
} }
template <class Input>
constexpr auto elements() const
{
using reduce_type = decltype(slicer(Input{}));
return get_shape_c<reduce_type>{}.elements();
}
}; };
template <class Slicer> template <class Slicer>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment