Commit 81b0ff5d authored by Paul Fultz II's avatar Paul Fultz II Committed by mvermeulen
Browse files

Add option to do offload copying automatically (#403)

* Add compiler options

* Add copy operators

* Formatting

* Use run_passes in tests

* Formatting

* Use run_pass in schedule test

* Formatting

* Add compile_options to get_passes in target

* Formatting

* Offload copy option

* Formatting

* Copy using pinned memory

* Formatting

* Improve performance of gpu copying

* Formatting

* Dont copy

* Formatting

* Always make an extra copy

* Formatting

* Remove unused write op

* Add missing include

* Remove copy_to_gpu function in python api

* Make offload copy disabled by default on C++

* Formatting

* Fix tidy issues

* Formatting

* Fix namespace

* Fix python tests

* Turn clang format off since its broken

* Fix compile error on gcc 5

* Remove commented code
parent e814cffb
......@@ -87,8 +87,9 @@ struct compiler
static const int q_fp16 = 1;
static const int q_int8 = 2;
loader l;
bool gpu = true;
int quantize = 0;
bool gpu = true;
bool offload_copy = false;
int quantize = 0;
std::vector<std::string> fill1;
void parse(argument_parser& ap)
......@@ -96,6 +97,10 @@ struct compiler
l.parse(ap);
ap(gpu, {"--gpu"}, ap.help("Compile on the gpu"), ap.set_value(true));
ap(gpu, {"--cpu"}, ap.help("Compile on the cpu"), ap.set_value(false));
ap(offload_copy,
{"--enable-offload-copy"},
ap.help("Enable implicit offload copying"),
ap.set_value(false));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(q_fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8));
ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append());
......@@ -103,10 +108,11 @@ struct compiler
auto params(const program& p, bool use_gpu = true)
{
bool gpu_flag = use_gpu && gpu && !offload_copy;
program::parameter_map m;
for(auto&& s : fill1)
m[s] = fill_argument(p.get_parameter_shape(s), 1);
fill_param_map(m, p, use_gpu && gpu);
fill_param_map(m, p, gpu_flag);
return m;
}
......@@ -122,7 +128,9 @@ struct compiler
{
quantize_int8(p, t, {params(p, false)});
}
p.compile(t);
compile_options options;
options.offload_copy = offload_copy;
p.compile(t, options);
return p;
}
};
......
......@@ -9,7 +9,7 @@ argument fill_argument(shape s, unsigned long value)
s.visit_type([&](auto as) {
using type = typename decltype(as)::type;
auto v = fill_tensor_data<type>(s, value);
result = {s, [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
result = {s, v};
});
return result;
}
......@@ -20,7 +20,7 @@ argument generate_argument(shape s, unsigned long seed)
s.visit_type([&](auto as) {
using type = typename decltype(as)::type;
auto v = generate_tensor_data<type>(s, seed);
result = {s, [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
result = {s, v};
});
return result;
}
......@@ -31,7 +31,7 @@ literal generate_literal(shape s, unsigned long seed)
s.visit_type([&](auto as) {
using type = typename decltype(as)::type;
auto v = generate_tensor_data<type>(s, seed);
result = {s, v};
result = {s, reinterpret_cast<char*>(v.get())};
});
return result;
}
......
......@@ -28,13 +28,26 @@ struct argument : raw_data<argument>
data = [=]() mutable { return buffer.data(); };
}
argument(shape s, std::function<char*()> d) : data(std::move(d)), m_shape(std::move(s)) {}
template <class F, MIGRAPHX_REQUIRES(std::is_pointer<decltype(std::declval<F>()())>{})>
argument(shape s, F d)
: data([f = std::move(d)]() mutable { return reinterpret_cast<char*>(f()); }),
m_shape(std::move(s))
{
}
template <class T>
argument(shape s, T* d)
: data([d] { return reinterpret_cast<char*>(d); }), m_shape(std::move(s))
{
}
template <class T>
argument(shape s, std::shared_ptr<T> d)
: data([d] { return reinterpret_cast<char*>(d.get()); }), m_shape(std::move(s))
{
}
argument(shape s, std::nullptr_t) : data([] { return nullptr; }), m_shape(std::move(s)) {}
/// Provides a raw pointer to the data
std::function<char*()> data = nullptr;
......@@ -49,6 +62,13 @@ struct argument : raw_data<argument>
return {s, [=]() mutable { return self.data(); }};
}
/// Make copy of the argument that is always sharing the data
argument share() const
{
auto self = std::make_shared<argument>(*this);
return {m_shape, [self]() mutable { return self->data(); }};
}
private:
shape m_shape;
};
......
......@@ -2,6 +2,8 @@
#define MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP
#include <migraphx/shape.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/config.hpp>
#include <algorithm>
......@@ -48,11 +50,12 @@ struct check_shapes
return end - begin;
}
const check_shapes& has(std::size_t n) const
template <class... Ts>
const check_shapes& has(Ts... ns) const
{
if(size() != n)
MIGRAPHX_THROW(prefix() + "Wrong number of arguments: expected " + std::to_string(n) +
" but given " + std::to_string(size()));
if(migraphx::none_of({ns...}, [&](auto i) { return this->size() == i; }))
MIGRAPHX_THROW(prefix() + "Wrong number of arguments: expected " +
to_string_range({ns...}) + " but given " + std::to_string(size()));
return *this;
}
......
#ifndef MIGRAPHX_GUARD_RTGLIB_COMPILE_OPTIONS_HPP
#define MIGRAPHX_GUARD_RTGLIB_COMPILE_OPTIONS_HPP
#include <migraphx/config.hpp>
#include <migraphx/tracer.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct compile_options
{
bool offload_copy = false;
tracer trace{};
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -78,20 +78,18 @@ struct xorshift_generator
};
template <class T>
std::vector<T> generate_tensor_data(const migraphx::shape& s, unsigned long seed = 0)
auto generate_tensor_data(const migraphx::shape& s, unsigned long seed = 0)
{
std::vector<T> result(s.elements());
std::generate(result.begin(), result.end(), xorshf96_generator<T>{seed});
// std::generate(result.begin(), result.end(), [&]{ return seed % 7; });
// std::generate(result.begin(), result.end(), []{ return 1; });
auto result = make_shared_array<T>(s.elements());
std::generate(result.get(), result.get() + s.elements(), xorshf96_generator<T>{seed});
return result;
}
template <class T>
std::vector<T> fill_tensor_data(const migraphx::shape& s, unsigned long value = 0)
auto fill_tensor_data(const migraphx::shape& s, unsigned long value = 0)
{
std::vector<T> result(s.elements());
std::generate(result.begin(), result.end(), [=] { return value; });
auto result = make_shared_array<T>(s.elements());
std::generate(result.get(), result.get() + s.elements(), [=] { return value; });
return result;
}
......
......@@ -8,7 +8,7 @@
#include <migraphx/builtin.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/target.hpp>
#include <migraphx/tracer.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/env.hpp>
#include <migraphx/config.hpp>
#include <algorithm>
......@@ -107,7 +107,7 @@ struct program
instruction_ref validate() const;
void compile(const target& t, tracer trace = tracer{});
void compile(const target& t, compile_options options = compile_options{});
void finalize();
......
......@@ -83,18 +83,30 @@ inline std::string remove_prefix(std::string s, const std::string& prefix)
return s;
}
template <class Range>
inline std::string to_string_range(const Range& r)
template <class Iterator>
inline std::string to_string_range(Iterator start, Iterator last)
{
std::stringstream ss;
if(!r.empty())
if(start != last)
{
ss << r.front();
std::for_each(std::next(r.begin()), r.end(), [&](auto&& x) { ss << ", " << x; });
ss << *start;
std::for_each(std::next(start), last, [&](auto&& x) { ss << ", " << x; });
}
return ss.str();
}
template <class Range>
inline std::string to_string_range(const Range& r)
{
return to_string_range(r.begin(), r.end());
}
template <class T>
inline std::string to_string_range(const std::initializer_list<T>& r)
{
return to_string_range(r.begin(), r.end());
}
template <class T>
inline std::string to_string(const T& x)
{
......
......@@ -11,6 +11,7 @@
#include <migraphx/context.hpp>
#include <migraphx/pass.hpp>
#include <migraphx/config.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/rank.hpp>
......@@ -28,9 +29,10 @@ struct target
* @brief The transformation pass to be run during compilation.
*
* @param ctx This is the target-dependent context that is created by `get_context`
* @param options Compiling options passed in by the user
* @return The passes to be ran
*/
std::vector<pass> get_passes(context& ctx) const;
std::vector<pass> get_passes(context& ctx, const compile_options& options) const;
/**
* @brief Construct a context for the target.
* @return The context to be used during compilation and execution.
......@@ -122,7 +124,7 @@ argument copy_from_target(T& x, const argument& arg)
* struct target
* {
* std::string name() const;
* std::vector<pass> get_passes(context& ctx) const;
* std::vector<pass> get_passes(context& ctx,const compile_options& options) const;
* context get_context() const;
* argument copy_to(const argument& input) const;
* argument copy_from(const argument& input) const;
......@@ -194,10 +196,10 @@ struct target
return (*this).private_detail_te_get_handle().name();
}
std::vector<pass> get_passes(context& ctx) const
std::vector<pass> get_passes(context& ctx, const compile_options& options) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().get_passes(ctx);
return (*this).private_detail_te_get_handle().get_passes(ctx, options);
}
context get_context() const
......@@ -237,12 +239,13 @@ struct target
virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
virtual const std::type_info& type() const = 0;
virtual std::string name() const = 0;
virtual std::vector<pass> get_passes(context& ctx) const = 0;
virtual context get_context() const = 0;
virtual argument copy_to(const argument& input) const = 0;
virtual argument copy_from(const argument& input) const = 0;
virtual argument allocate(const shape& s) const = 0;
virtual std::string name() const = 0;
virtual std::vector<pass> get_passes(context& ctx,
const compile_options& options) const = 0;
virtual context get_context() const = 0;
virtual argument copy_to(const argument& input) const = 0;
virtual argument copy_from(const argument& input) const = 0;
virtual argument allocate(const shape& s) const = 0;
};
template <typename PrivateDetailTypeErasedT>
......@@ -275,10 +278,10 @@ struct target
std::string name() const override { return private_detail_te_value.name(); }
std::vector<pass> get_passes(context& ctx) const override
std::vector<pass> get_passes(context& ctx, const compile_options& options) const override
{
return private_detail_te_value.get_passes(ctx);
return private_detail_te_value.get_passes(ctx, options);
}
context get_context() const override { return private_detail_te_value.get_context(); }
......
......@@ -345,15 +345,15 @@ instruction_ref program::validate() const
[&](const instruction& i) { return !i.valid(impl->instructions.begin()); });
}
void program::compile(const target& t, tracer trace)
void program::compile(const target& t, compile_options options)
{
assert(this->validate() == impl->instructions.end());
this->impl->ctx = t.get_context();
if(enabled(MIGRAPHX_TRACE_COMPILE{}))
trace = tracer{std::cout};
trace(*this);
trace();
run_passes(*this, t.get_passes(this->impl->ctx), trace);
options.trace = tracer{std::cout};
options.trace(*this);
options.trace();
run_passes(*this, t.get_passes(this->impl->ctx, options), options.trace);
auto invalid = this->validate();
if(invalid != impl->instructions.end())
{
......
......@@ -159,7 +159,14 @@ PYBIND11_MODULE(migraphx, m)
.def("clone", [](migraphx::program& p) { return *(new migraphx::program(p)); })
.def("get_parameter_shapes", &migraphx::program::get_parameter_shapes)
.def("get_shape", &migraphx::program::get_shape)
.def("compile", [](migraphx::program& p, const migraphx::target& t) { p.compile(t); })
.def("compile",
[](migraphx::program& p, const migraphx::target& t, bool offload_copy) {
migraphx::compile_options options;
options.offload_copy = offload_copy;
p.compile(t, options);
},
py::arg("t"),
py::arg("offload_copy") = true)
.def("run", &migraphx::program::eval)
.def("__eq__", std::equal_to<migraphx::program>{})
.def("__ne__", std::not_equal_to<migraphx::program>{})
......@@ -199,7 +206,6 @@ PYBIND11_MODULE(migraphx, m)
m.def("to_gpu", &migraphx::gpu::to_gpu, py::arg("arg"), py::arg("host") = false);
m.def("from_gpu", &migraphx::gpu::from_gpu);
m.def("gpu_sync", &migraphx::gpu::gpu_sync);
m.def("copy_to_gpu", &migraphx::gpu::copy_to_gpu);
#endif
#ifdef VERSION_INFO
......
......@@ -2,6 +2,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
#include <migraphx/program.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp>
......@@ -13,7 +14,7 @@ namespace cpu {
struct target
{
std::string name() const;
std::vector<pass> get_passes(migraphx::context& ctx) const;
std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
migraphx::context get_context() const { return context{}; }
argument copy_to(const argument& arg) const { return arg; }
......
......@@ -13,7 +13,7 @@ namespace cpu {
std::string target::name() const { return "cpu"; }
std::vector<pass> target::get_passes(migraphx::context&) const
std::vector<pass> target::get_passes(migraphx::context&, const compile_options&) const
{
return {rewrite_rnn{},
dead_code_elimination{},
......
......@@ -101,6 +101,7 @@ add_library(migraphx_gpu
int8_gemm_pack.cpp
int8_conv_pack.cpp
gemm_impl.cpp
preallocate_param.cpp
)
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_set_soversion(migraphx_gpu ${PROJECT_VERSION})
......
......@@ -5,7 +5,6 @@
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/arg_op.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -5,7 +5,6 @@
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/arg_op.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -4,7 +4,6 @@
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -4,7 +4,6 @@
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -69,8 +68,6 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
});
}
void sync_stream(hipStream_t stream) { (void)hipStreamSynchronize(stream); }
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -5,7 +5,6 @@
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -6,7 +6,6 @@
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment