Commit 5aa0ba49 authored by Paul's avatar Paul
Browse files

Add a pass to eliminate workspace allocations

parent ef048a56
...@@ -7,13 +7,37 @@ ...@@ -7,13 +7,37 @@
namespace migraph { namespace migraph {
template<class T>
struct xorshf96_generator
{
unsigned long x=123456789;
unsigned long y=362436069;
unsigned long z=521288629;
constexpr T operator()()
{
unsigned long t = 0;
x ^= x << 16;
x ^= x >> 5;
x ^= x << 1;
t = x;
x = y;
y = z;
z = t ^ x ^ y;
return z;
}
};
template <class T> template <class T>
std::vector<T> generate_tensor_data(migraph::shape s, std::mt19937::result_type seed = 0) std::vector<T> generate_tensor_data(migraph::shape s, std::mt19937::result_type seed = 0)
{ {
std::vector<T> result(s.elements()); std::vector<T> result(s.elements());
std::mt19937 engine{seed}; std::mt19937 engine{seed};
std::uniform_real_distribution<> dist; std::uniform_real_distribution<> dist;
std::generate(result.begin(), result.end(), [&] { return dist(engine); }); // std::generate(result.begin(), result.end(), [&] { return dist(engine); });
std::generate(result.begin(), result.end(), xorshf96_generator<T>{});
return result; return result;
} }
......
...@@ -27,6 +27,7 @@ int main(int argc, char const* argv[]) ...@@ -27,6 +27,7 @@ int main(int argc, char const* argv[])
auto p = migraph::parse_onnx(file); auto p = migraph::parse_onnx(file);
std::cout << "Compiling ... " << std::endl; std::cout << "Compiling ... " << std::endl;
p.compile(migraph::gpu::target{}); p.compile(migraph::gpu::target{});
std::cout << "Allocating params ... " << std::endl;
auto m = create_param_map(p); auto m = create_param_map(p);
std::cout << "Running performance report ... " << std::endl; std::cout << "Running performance report ... " << std::endl;
p.perf_report(std::cout, 10, m); p.perf_report(std::cout, 10, m);
......
...@@ -245,8 +245,10 @@ void program::compile(const target& t) ...@@ -245,8 +245,10 @@ void program::compile(const target& t)
std::cout << "Pass: " << p.name() << std::endl; std::cout << "Pass: " << p.name() << std::endl;
p.apply(*this); p.apply(*this);
if(enabled(MIGRAPH_TRACE_COMPILE{})) if(enabled(MIGRAPH_TRACE_COMPILE{}))
std::cout << *this << std::endl << std::endl; std::cout << *this << std::endl;
#ifndef NDEBUG #ifndef NDEBUG
if(enabled(MIGRAPH_TRACE_COMPILE{}))
std::cout << "Validate ..." << std::endl;
auto invalid = this->validate(); auto invalid = this->validate();
if(invalid != impl->instructions.end()) if(invalid != impl->instructions.end())
{ {
...@@ -254,6 +256,8 @@ void program::compile(const target& t) ...@@ -254,6 +256,8 @@ void program::compile(const target& t)
MIGRAPH_THROW(p.name() + " pass produces invalid program at instruction " + MIGRAPH_THROW(p.name() + " pass produces invalid program at instruction " +
std::to_string(index) + ": " + invalid->op.name()); std::to_string(index) + ": " + invalid->op.name());
} }
if(enabled(MIGRAPH_TRACE_COMPILE{}))
std::cout << std::endl;
#endif #endif
} }
auto invalid = this->validate(); auto invalid = this->validate();
......
...@@ -18,6 +18,7 @@ target_link_libraries(migraph_device migraph hip::device) ...@@ -18,6 +18,7 @@ target_link_libraries(migraph_device migraph hip::device)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
add_library(migraph_gpu add_library(migraph_gpu
eliminate_workspace.cpp
hip.cpp hip.cpp
target.cpp target.cpp
lowering.cpp lowering.cpp
......
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/program.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/ranges.hpp>
#include <migraph/stringutils.hpp>
namespace migraph {
namespace gpu {
void eliminate_workspace::apply(program& p) const
{
std::size_t n = 0;
std::vector<instruction_ref> allocs;
for(auto ins : iterator_for(p))
{
if(ins->output.size() != 1)
continue;
if(ins->op.name() != "hip::allocate")
continue;
auto&& a = any_cast<hip_allocate>(ins->op);
if(a.tag == "workspace")
{
n = std::max(n, ins->get_shape().bytes());
allocs.push_back(ins);
}
}
auto ws = p.add_parameter("workspace", shape{shape::int8_type, {n}});
for(auto&& a:allocs)
{
p.replace_instruction(a, ws);
p.remove_instruction(a);
}
}
} // namespace gpu
} // namespace migraph
#ifndef MIGRAPH_GUARD_RTGLIB_ELIMINATE_WORKSPACE_HPP
#define MIGRAPH_GUARD_RTGLIB_ELIMINATE_WORKSPACE_HPP
#include <string>
#include <migraph/instruction_ref.hpp>
namespace migraph {
struct program;
namespace gpu {
struct eliminate_workspace
{
std::string name() const { return "eliminate_workspace"; }
void apply(program& p) const;
};
} // namespace gpu
} // namespace migraph
#endif
...@@ -14,6 +14,7 @@ migraph::argument from_gpu(migraph::argument arg); ...@@ -14,6 +14,7 @@ migraph::argument from_gpu(migraph::argument arg);
struct hip_allocate struct hip_allocate
{ {
std::string tag{};
std::string name() const { return "hip::allocate"; } std::string name() const { return "hip::allocate"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
......
...@@ -125,7 +125,7 @@ struct miopen_convolution ...@@ -125,7 +125,7 @@ struct miopen_convolution
workspace_size, workspace_size,
false); false);
algo = perf.fwd_algo; algo = perf.fwd_algo;
return workspace_shape; return algo == miopenConvolutionFwdAlgoWinograd ? shape{shape::int8_type, {0}} : workspace_shape;
} }
}; };
...@@ -332,7 +332,7 @@ struct miopen_apply ...@@ -332,7 +332,7 @@ struct miopen_apply
} }
} }
instruction_ref insert_allocation(instruction_ref ins, const shape& s) instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag="")
{ {
if(ins == --prog->end()) if(ins == --prog->end())
{ {
...@@ -341,7 +341,7 @@ struct miopen_apply ...@@ -341,7 +341,7 @@ struct miopen_apply
else else
{ {
auto is = prog->add_outline(s); auto is = prog->add_outline(s);
auto result = prog->insert_instruction(ins, hip_allocate{}, is); auto result = prog->insert_instruction(ins, hip_allocate{tag}, is);
return result; return result;
} }
} }
...@@ -353,7 +353,7 @@ struct miopen_apply ...@@ -353,7 +353,7 @@ struct miopen_apply
auto conv = miopen_convolution{op, make_conv(op)}; auto conv = miopen_convolution{op, make_conv(op)};
auto ws = conv.compile(ctx, ins->result, ins->arguments); auto ws = conv.compile(ctx, ins->result, ins->arguments);
auto workspace = insert_allocation(ins, ws); auto workspace = insert_allocation(ins, ws, "workspace");
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction( prog->replace_instruction(
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <migraph/gpu/lowering.hpp> #include <migraph/gpu/lowering.hpp>
#include <migraph/gpu/write_literals.hpp> #include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp> #include <migraph/gpu/context.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/check_context.hpp> #include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp> #include <migraph/auto_contiguous.hpp>
#include <migraph/dead_code_elimination.hpp> #include <migraph/dead_code_elimination.hpp>
...@@ -22,6 +23,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const ...@@ -22,6 +23,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
simplify_reshapes{}, simplify_reshapes{},
dead_code_elimination{}, dead_code_elimination{},
lowering{ctx}, lowering{ctx},
eliminate_workspace{},
eliminate_contiguous{}, eliminate_contiguous{},
dead_code_elimination{}, dead_code_elimination{},
write_literals{}, write_literals{},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment