Unverified Commit 29fa2666 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Add gpu driver and improvements to pointwise codegen (#851)



* Add method to compile pointwise

* Formatting

* Add lambda

* Add semicolon

* Rename variable

* Add driver to run jit kernels

* Formatting

* Add context

* Formatting

* Make seperate driver folder

* Add more general gpu driver

* Formatting

* Print out wll time

* Formatting

* Run multiple times and skip first run

* Formatting

* Seperate time_op

* Run an op for comparison

* Formatting

* Add debug asserts

* Formatting

* Change parameer name

* Formatting

* Fix argument order

* Formatting

* Add preloading

* Formatting

* Allow a different data type

* Formatting

* Pipeline transformations

* Formatting

* Add vectorization

* Formatting

* Reduce dims

* Formatting

* Compile with launch params as constant

* Formatting

* Make sure buffer can be vecotrized

* Formatting

* Enable vectorization and preloading

* Formatting

* Add print header

* Formatting

* Avoid allocating to large of LDS

* Formatting

* Add some vec functions to a seperate header

* Formatting

* Add stride loops

* Formatting

* Improve the transform pipeline

* Formatting

* Add const

* Fix shape check

* Formatting

* Just check stride axis is zero

* Remove extra finc_vector_axis overload

* Simplify some mroe functions

* Formatting

* Remove some more extra functions

* Formatting

* Simplify more decltypes

* Add another const

* Fix test

* Get buffer pointer different for older compilers
Co-authored-by: default avatarShucai Xiao <shucai@gmail.com>
Co-authored-by: default avatarChris Austen <causten@users.noreply.github.com>
parent 30966f6b
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <migraphx/errors.hpp> #include <migraphx/errors.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/convert_to_json.hpp> #include <migraphx/convert_to_json.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -77,6 +78,18 @@ std::vector<token> json_tokenize(const std::string& s) ...@@ -77,6 +78,18 @@ std::vector<token> json_tokenize(const std::string& s)
return ++start; return ++start;
}); });
// Line comments
lexers.push_back([](const char* start, const char* end) {
if(*start == '#')
start++;
else if((start + 1) < end and start[0] == '/' and start[1] == '/')
start += 2;
else
return start;
return std::find_if(start, end, [&](char c) { return c == '\n'; });
});
// Whitespace
lexers.push_back(lex_while(&isspace)); lexers.push_back(lex_while(&isspace));
// Punctation // Punctation
...@@ -98,6 +111,8 @@ std::string convert_to_json(const std::string& str) ...@@ -98,6 +111,8 @@ std::string convert_to_json(const std::string& str)
for(auto& token : tokens) for(auto& token : tokens)
{ {
std::string s(token.first, token.second); std::string s(token.first, token.second);
if(starts_with(s, "#") or starts_with(s, "//"))
continue;
if(std::isalpha(s.front()) != 0 and if(std::isalpha(s.front()) != 0 and
not contains({"null", "nan", "true", "false", "inf"}, s)) not contains({"null", "nan", "true", "false", "inf"}, s))
{ {
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
std::vector<char> read_buffer(const std::string& filename) template <class T>
T generic_read_file(const std::string& filename)
{ {
std::ifstream is(filename, std::ios::binary | std::ios::ate); std::ifstream is(filename, std::ios::binary | std::ios::ate);
std::streamsize size = is.tellg(); std::streamsize size = is.tellg();
...@@ -14,12 +15,22 @@ std::vector<char> read_buffer(const std::string& filename) ...@@ -14,12 +15,22 @@ std::vector<char> read_buffer(const std::string& filename)
MIGRAPHX_THROW("Invalid size for: " + filename); MIGRAPHX_THROW("Invalid size for: " + filename);
is.seekg(0, std::ios::beg); is.seekg(0, std::ios::beg);
std::vector<char> buffer(size); T buffer(size, 0);
if(!is.read(buffer.data(), size)) if(!is.read(&buffer[0], size))
MIGRAPHX_THROW("Error reading file: " + filename); MIGRAPHX_THROW("Error reading file: " + filename);
return buffer; return buffer;
} }
std::vector<char> read_buffer(const std::string& filename)
{
return generic_read_file<std::vector<char>>(filename);
}
std::string read_string(const std::string& filename)
{
return generic_read_file<std::string>(filename);
}
void write_buffer(const std::string& filename, const char* buffer, std::size_t size) void write_buffer(const std::string& filename, const char* buffer, std::size_t size)
{ {
std::ofstream os(filename); std::ofstream os(filename);
......
...@@ -9,6 +9,7 @@ namespace migraphx { ...@@ -9,6 +9,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
std::vector<char> read_buffer(const std::string& filename); std::vector<char> read_buffer(const std::string& filename);
std::string read_string(const std::string& filename);
void write_buffer(const std::string& filename, const char* buffer, std::size_t size); void write_buffer(const std::string& filename, const char* buffer, std::size_t size);
void write_buffer(const std::string& filename, const std::vector<char>& buffer); void write_buffer(const std::string& filename, const std::vector<char>& buffer);
......
File mode changed from 100644 to 100755
...@@ -169,6 +169,12 @@ void copy(Range&& r, Iterator it) ...@@ -169,6 +169,12 @@ void copy(Range&& r, Iterator it)
std::copy(r.begin(), r.end(), it); std::copy(r.begin(), r.end(), it);
} }
template <class Range, class Iterator, class F>
void transform(Range&& r, Iterator it, F f)
{
std::transform(r.begin(), r.end(), it, f);
}
template <class Range> template <class Range>
auto reverse(Range& r) auto reverse(Range& r)
{ {
......
...@@ -393,6 +393,31 @@ struct value ...@@ -393,6 +393,31 @@ struct value
return result; return result;
} }
template <class To>
To get(const std::string& pkey, const To& default_value) const
{
const auto* v = find(pkey);
if(v == this->end())
return default_value;
return v->to<To>();
}
template <class To>
std::vector<To> get(const std::string& pkey, const std::vector<To>& default_value) const
{
const auto* v = find(pkey);
if(v == this->end())
return default_value;
return v->to_vector<To>();
}
template <class To>
std::vector<To> get(const std::string& pkey,
const std::initializer_list<To>& default_value) const
{
return get<std::vector<To>>(pkey, default_value);
}
friend bool operator==(const value& x, const value& y); friend bool operator==(const value& x, const value& y);
friend bool operator!=(const value& x, const value& y); friend bool operator!=(const value& x, const value& y);
friend bool operator<(const value& x, const value& y); friend bool operator<(const value& x, const value& y);
......
...@@ -119,6 +119,7 @@ add_library(migraphx_gpu ...@@ -119,6 +119,7 @@ add_library(migraphx_gpu
code_object_op.cpp code_object_op.cpp
compile_hip.cpp compile_hip.cpp
compile_hip_code_object.cpp compile_hip_code_object.cpp
compile_pointwise.cpp
concat.cpp concat.cpp
convert.cpp convert.cpp
convolution.cpp convolution.cpp
...@@ -313,6 +314,8 @@ target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1) ...@@ -313,6 +314,8 @@ target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas) target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels) target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver)
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_gpu migraphx_device TARGETS migraphx_gpu migraphx_device
INCLUDE INCLUDE
......
File mode changed from 100644 to 100755
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/compile_src.hpp> #include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp> #include <migraphx/process.hpp>
#include <migraphx/env.hpp>
#include <cassert> #include <cassert>
namespace migraphx { namespace migraphx {
...@@ -21,6 +22,9 @@ bool is_hip_clang_compiler() ...@@ -21,6 +22,9 @@ bool is_hip_clang_compiler()
return result; return result;
} }
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
std::vector<std::vector<char>> std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch) compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
{ {
...@@ -41,9 +45,12 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -41,9 +45,12 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{ {
params += " --cuda-gpu-arch=" + arch; params += " --cuda-gpu-arch=" + arch;
params += " --cuda-device-only"; params += " --cuda-device-only";
params += " -O3 "; params += " -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " ";
} }
if(enabled(MIGRAPHX_GPU_DEBUG{}))
params += " -DMIGRAPHX_DEBUG";
params += " -Wno-unused-command-line-argument -Wno-cuda-compat "; params += " -Wno-unused-command-line-argument -Wno-cuda-compat ";
params += MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_FLAGS); params += MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_FLAGS);
......
...@@ -82,9 +82,12 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -82,9 +82,12 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
}); });
srcs.push_back(src_file{fs::path{"main.cpp"}, srcs.push_back(src_file{fs::path{"main.cpp"},
std::make_pair(content.data(), content.data() + content.size())}); std::make_pair(content.data(), content.data() + content.size())});
auto args_hpp = generate_args_hpp(options.inputs); auto args_hpp =
generate_args_hpp(options.reduced_inputs.empty() ? options.inputs : options.reduced_inputs);
srcs.push_back(src_file{fs::path{"args.hpp"}, srcs.push_back(src_file{fs::path{"args.hpp"},
std::make_pair(args_hpp.data(), args_hpp.data() + args_hpp.size())}); std::make_pair(args_hpp.data(), args_hpp.data() + args_hpp.size())});
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " -I."; options.params += " -I.";
auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name()); auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name());
if(cos.size() != 1) if(cos.size() != 1)
......
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
using namespace migraphx;
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
}
}
int main() {}
)__migraphx__";
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local = 1024)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
operation compile_pointwise(context&, const std::vector<shape>& inputs, const std::string& lambda)
{
hip_compile_options options;
options.global = compute_global(inputs.front().elements());
options.local = 1024;
options.inputs = inputs;
options.output = inputs.back();
options.reduced_inputs = reduce_dims(inputs);
auto src = interpolate_string(pointwise_kernel,
{{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"lambda", lambda}});
return compile_hip_code_object(src, options);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
add_executable(gpu-driver
action.cpp
compile_pointwise.cpp
main.cpp
parser.cpp
perf.cpp
run_op.cpp
)
target_include_directories(gpu-driver PRIVATE include)
target_link_libraries(gpu-driver PRIVATE migraphx_gpu)
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/errors.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
auto& action_map()
{
static std::unordered_map<std::string, action_function> m;
return m;
}
action_function get_action(const std::string& name)
{
if(action_map().count(name) == 0)
MIGRAPHX_THROW("Missing action: " + name);
return action_map().at(name);
}
void register_action(const std::string& name, const action_function& a) { action_map()[name] = a; }
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
struct compile_pointwise : action<compile_pointwise>
{
static void apply(const parser& p, const value& v)
{
context ctx;
auto inputs = p.parse_shapes(v.at("inputs"));
auto op = gpu::compile_pointwise(ctx, inputs, v.at("lambda").to<std::string>());
double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl;
}
};
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
#define MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
#include <migraphx/config.hpp>
#include <migraphx/auto_register.hpp>
#include <migraphx/type_name.hpp>
#include <migraphx/gpu/driver/parser.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
using action_function = std::function<void(const parser&, const value&)>;
action_function get_action(const std::string& name);
void register_action(const std::string& name, const action_function& a);
struct auto_register_action
{
template <class T>
static void apply()
{
auto name = get_type_name<T>();
register_action(name.substr(name.rfind("::") + 2),
[](auto&&... xs) { T::apply(std::forward<decltype(xs)>(xs)...); });
}
};
template <class T>
using action = auto_register<auto_register_action, T>;
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
#define MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
#include <migraphx/value.hpp>
#include <migraphx/shape.hpp>
#include <unordered_map>
#include <functional>
#include <vector>
#include <string>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
[[noreturn]] void error(const std::string& msg);
struct parser
{
parser() = default;
template <class T>
T get(const value& v, const std::string& key, const T& default_value) const
{
return v.get(key, settings.get(key, default_value));
}
shape parse_shape(const value& v) const;
std::vector<shape> parse_shapes(const value& v) const;
void load_settings(const value& v);
static void process(const value& v);
private:
value settings = value::object{};
};
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
#define MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
#include <migraphx/gpu/driver/parser.hpp>
#include <migraphx/json.hpp>
#include <migraphx/convert_to_json.hpp>
#include <migraphx/file_buffer.hpp>
using namespace migraphx; // NOLINT
using namespace migraphx::gpu; // NOLINT
using namespace migraphx::gpu::driver; // NOLINT
int main(int argc, char const* argv[])
{
std::vector<std::string> args(argv, argv + argc);
if(args.size() < 2)
{
std::cout << "Usage: gpu-driver <input-file>" << std::endl;
std::abort();
}
auto v = from_json_string(convert_to_json(read_string(args[1])));
parser::process(v);
}
#include <migraphx/gpu/driver/parser.hpp>
#include <migraphx/gpu/driver/action.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
[[noreturn]] void error(const std::string& msg)
{
std::cout << msg << std::endl;
std::abort();
}
shape parser::parse_shape(const value& v) const
{
auto lens = get(v, "lens", std::vector<std::size_t>{});
auto strides = get(v, "strides", std::vector<std::size_t>{});
auto type = shape::parse_type(get<std::string>(v, "type", "float"));
if(strides.empty())
return shape{type, lens};
else
return shape{type, lens, strides};
}
std::vector<shape> parser::parse_shapes(const value& v) const
{
std::vector<shape> result;
std::transform(
v.begin(), v.end(), std::back_inserter(result), [&](auto&& x) { return parse_shape(x); });
return result;
}
void parser::load_settings(const value& v)
{
if(v.contains("settings"))
settings = v.at("settings");
}
void parser::process(const value& v)
{
if(not v.is_object())
error("Input is not an object");
parser p{};
p.load_settings(v);
for(auto&& pp : v)
{
if(pp.get_key() == "settings")
continue;
get_action(pp.get_key())(p, pp.without_key());
}
}
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/context.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/time.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace driver {
std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsigned long seed = 0)
{
std::vector<argument> args;
std::transform(shapes.begin(), shapes.end(), std::back_inserter(args), [&](auto& s) {
return to_gpu(generate_argument(s, seed++));
});
return args;
}
using milliseconds = std::chrono::duration<double, std::milli>;
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n)
{
// TODO: Use std::ref
migraphx::context gctx = ctx;
auto output = op.compute_shape(inputs);
op.finalize(gctx, output, inputs);
auto args = generate_arguments(inputs);
auto run = [&] {
op.compute(gctx, output, args);
gctx.finish();
};
run();
auto r = range(n);
double t = std::accumulate(
r.begin(), r.end(), double{0.0}, [&](auto x, auto) { return x + time<milliseconds>(run); });
return t / n;
}
} // namespace driver
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment