Commit 8143e4fb authored by wsttiger's avatar wsttiger
Browse files

Merge branch 'master' into remove_concat

parents 0a4583b7 9ca0fbf1
...@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx, ...@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx,
rocblas_int m = output_shape.lens()[0]; rocblas_int m = output_shape.lens()[0];
rocblas_int n = output_shape.lens()[1]; rocblas_int n = output_shape.lens()[1];
rocblas_int k = args[0].get_shape().lens()[1]; rocblas_int k = args[0].get_shape().lens()[1];
rocblas_sgemm(ctx.rbhandle.get(), rocblas_sgemm(ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none, transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none, transa ? rocblas_operation_transpose : rocblas_operation_none,
n, n,
......
...@@ -38,14 +38,6 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false) ...@@ -38,14 +38,6 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
return hip_ptr{result}; return hip_ptr{result};
} }
template <class T>
hip_ptr write_to_gpu(const T& x)
{
using type = typename T::value_type;
auto size = x.size() * sizeof(type);
return write_to_gpu(x.data(), size);
}
template <class T> template <class T>
std::vector<T> read_from_gpu(const void* x, std::size_t sz) std::vector<T> read_from_gpu(const void* x, std::size_t sz)
{ {
...@@ -65,6 +57,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) ...@@ -65,6 +57,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
return result; return result;
} }
template <class T>
hip_ptr write_to_gpu(const T& x)
{
using type = typename T::value_type;
auto size = x.size() * sizeof(type);
return write_to_gpu(x.data(), size);
}
argument allocate_gpu(const shape& s, bool host) argument allocate_gpu(const shape& s, bool host)
{ {
auto p = share(allocate_gpu(s.bytes() + 1, host)); auto p = share(allocate_gpu(s.bytes() + 1, host));
...@@ -88,6 +88,13 @@ argument from_gpu(argument arg) ...@@ -88,6 +88,13 @@ argument from_gpu(argument arg)
return result; return result;
} }
void set_device(std::size_t id)
{
auto status = hipSetDevice(id);
if(status != hipSuccess)
MIGRAPH_THROW("Error setting device");
}
void gpu_sync() { hipDeviceSynchronize(); } void gpu_sync() { hipDeviceSynchronize(); }
void copy_to_gpu(argument src, argument dst) void copy_to_gpu(argument src, argument dst)
......
...@@ -4,17 +4,114 @@ ...@@ -4,17 +4,114 @@
#include <migraph/gpu/miopen.hpp> #include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/rocblas.hpp> #include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/hip.hpp> #include <migraph/gpu/hip.hpp>
#include <migraph/env.hpp>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
MIGRAPH_DECLARE_ENV_VAR(MIGRAPH_DISABLE_NULL_STREAM)
struct hip_device
{
hip_device() { add_stream(); }
hip_device(std::size_t id) : device_id(id) { add_stream(); }
struct stream
{
using hip_stream_ptr = MIGRAPH_MANAGE_PTR(hipStream_t, hipStreamDestroy);
stream() {}
stream(std::size_t device_number) : id(device_number) {}
void setup() { set_device(id); }
static hip_stream_ptr create_stream()
{
hipStream_t result = nullptr;
auto status = hipStreamCreate(&result);
if(status != hipSuccess)
MIGRAPH_THROW("Failed to allocate stream");
return hip_stream_ptr{result};
}
hipStream_t get()
{
if(enabled(MIGRAPH_DISABLE_NULL_STREAM{}))
{
setup();
if(s == nullptr)
s = create_stream();
assert(s.get() != nullptr);
return s.get();
}
return nullptr;
}
auto create_miopen_handle()
{
if(enabled(MIGRAPH_DISABLE_NULL_STREAM{}))
return make_obj<miopen_handle>(&miopenCreateWithStream, get());
else
return make_obj<miopen_handle>(&miopenCreate);
}
auto get_miopen()
{
setup();
if(mihandle == nullptr)
mihandle = create_miopen_handle();
assert(mihandle.get() != nullptr);
return mihandle.get();
}
auto get_rocblas()
{
setup();
if(rbhandle == nullptr)
rbhandle = create_rocblas_handle_ptr(get());
assert(rbhandle.get() != nullptr);
return rbhandle.get();
}
private:
std::size_t id = 0;
shared<hip_stream_ptr> s = nullptr;
shared<miopen_handle> mihandle = nullptr;
shared<rocblas_handle_ptr> rbhandle = nullptr;
};
void add_stream() { streams.emplace_back(device_id); }
stream& get_stream() { return streams.at(current_stream); }
void set_stream(std::size_t n) { current_stream = n; }
private:
std::size_t device_id = 0;
std::size_t current_stream = 0;
std::vector<stream> streams;
};
struct context struct context
{ {
shared<miopen_handle> handle; context(std::size_t n = 0) : current_device(std::make_shared<hip_device>(n)) {}
shared<rocblas_handle_ptr> rbhandle;
argument scratch; hip_device& get_current_device()
{
assert(current_device != nullptr);
return *current_device;
}
hip_device::stream& get_stream() { return get_current_device().get_stream(); }
std::vector<argument> literals{}; std::vector<argument> literals{};
void finish() const { gpu_sync(); } void finish() const { gpu_sync(); }
private:
// TODO: Make this a vector to support multiple devices
std::shared_ptr<hip_device> current_device;
}; };
} // namespace gpu } // namespace gpu
} // namespace migraph } // namespace migraph
......
...@@ -3,14 +3,19 @@ ...@@ -3,14 +3,19 @@
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP #define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraph/argument.hpp> #include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
void add(const argument& result, const argument& arg1, const argument& arg2); void add(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2);
void add(const argument& result, const argument& arg1, const argument& arg2, const argument& arg3); void add(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
...@@ -3,14 +3,19 @@ ...@@ -3,14 +3,19 @@
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP #define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraph/argument.hpp> #include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
void add_relu(const argument& result, const argument& arg1, const argument& arg2); void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2);
void add_relu(const argument& result, void add_relu(hipStream_t stream,
const argument& result,
const argument& arg1, const argument& arg1,
const argument& arg2, const argument& arg2,
const argument& arg3); const argument& arg3);
......
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_CONCAT_HPP #ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_CONCAT_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_CONCAT_HPP #define MIGRAPH_GUARD_RTGLIB_DEVICE_CONCAT_HPP
#include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument argument concat(hipStream_t stream,
concat(const shape& output_shape, std::vector<argument> args, std::vector<std::size_t> offsets); const shape& output_shape,
std::vector<argument> args,
std::vector<std::size_t> offsets);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
...@@ -2,12 +2,13 @@ ...@@ -2,12 +2,13 @@
#define MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP #define MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
#include <migraph/argument.hpp> #include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
void contiguous(argument result, argument arg); void contiguous(hipStream_t stream, argument result, argument arg);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_MUL_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_MUL_HPP
#include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace migraph {
namespace gpu {
namespace device {
void mul(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2);
void mul(hipStream_t stream,
const argument& result,
const argument& arg1,
const argument& arg2,
const argument& arg3);
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
...@@ -22,7 +22,7 @@ namespace gpu { ...@@ -22,7 +22,7 @@ namespace gpu {
struct miopen_gemm struct miopen_gemm
{ {
op::gemm op; op::dot op;
std::string name() const { return "gpu::gemm"; } std::string name() const { return "gpu::gemm"; }
shape compute_shape(const std::vector<shape>& inputs) const; shape compute_shape(const std::vector<shape>& inputs) const;
argument argument
......
...@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false); ...@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false);
migraph::argument from_gpu(migraph::argument arg); migraph::argument from_gpu(migraph::argument arg);
void set_device(std::size_t id);
void gpu_sync(); void gpu_sync();
void copy_to_gpu(argument src, argument dst); void copy_to_gpu(argument src, argument dst);
......
...@@ -41,6 +41,8 @@ inline tensor_descriptor make_tensor(const migraph::shape& s) ...@@ -41,6 +41,8 @@ inline tensor_descriptor make_tensor(const migraph::shape& s)
miopenDataType_t d; miopenDataType_t d;
if(s.type() == shape::float_type) if(s.type() == shape::float_type)
d = miopenFloat; d = miopenFloat;
else if(s.type() == shape::half_type)
d = miopenHalf;
else else
MIGRAPH_THROW("Unsupported type"); MIGRAPH_THROW("Unsupported type");
miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data()); miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());
......
#ifndef MIGRAPH_GUARD_RTGLIB_MUL_HPP
#define MIGRAPH_GUARD_RTGLIB_MUL_HPP
#include <migraph/gpu/lowering.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/generate.hpp>
#include <migraph/shape_for_each.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp>
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/mul.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
#include <utility>
namespace migraph {
namespace gpu {
struct hip_mul
{
std::string name() const { return "gpu::mul"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context&, const shape&, const std::vector<argument>& args) const;
};
} // namespace gpu
} // namespace migraph
#endif
...@@ -11,6 +11,7 @@ namespace gpu { ...@@ -11,6 +11,7 @@ namespace gpu {
using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle); using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle);
rocblas_handle_ptr create_rocblas_handle_ptr(); rocblas_handle_ptr create_rocblas_handle_ptr();
rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
} // namespace gpu } // namespace gpu
......
...@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx, ...@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.handle.get(), miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(), ad.get(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <migraph/gpu/leaky_relu.hpp> #include <migraph/gpu/leaky_relu.hpp>
#include <migraph/gpu/softmax.hpp> #include <migraph/gpu/softmax.hpp>
#include <migraph/gpu/add.hpp> #include <migraph/gpu/add.hpp>
#include <migraph/gpu/mul.hpp>
#include <migraph/gpu/batchnorm.hpp> #include <migraph/gpu/batchnorm.hpp>
#include <migraph/gpu/pooling.hpp> #include <migraph/gpu/pooling.hpp>
#include <migraph/gpu/gemm.hpp> #include <migraph/gpu/gemm.hpp>
...@@ -65,7 +66,11 @@ struct miopen_apply ...@@ -65,7 +66,11 @@ struct miopen_apply
{ {
check_shape(s, apply_add(it)); check_shape(s, apply_add(it));
} }
else if(it->name() == "gemm") else if(it->name() == "mul")
{
check_shape(s, apply_mul(it));
}
else if(it->name() == "dot")
{ {
check_shape(s, apply_gemm(it)); check_shape(s, apply_gemm(it));
} }
...@@ -163,9 +168,16 @@ struct miopen_apply ...@@ -163,9 +168,16 @@ struct miopen_apply
ins, hip_add{}, ins->inputs().at(0), ins->inputs().at(1), output); ins, hip_add{}, ins->inputs().at(0), ins->inputs().at(1), output);
} }
instruction_ref apply_mul(instruction_ref ins)
{
auto output = insert_allocation(ins, ins->get_shape());
return prog->replace_instruction(
ins, hip_mul{}, ins->inputs().at(0), ins->inputs().at(1), output);
}
instruction_ref apply_gemm(instruction_ref ins) instruction_ref apply_gemm(instruction_ref ins)
{ {
auto&& op = any_cast<op::gemm>(ins->get_operator()); auto&& op = any_cast<op::dot>(ins->get_operator());
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
return prog->replace_instruction( return prog->replace_instruction(
ins, miopen_gemm{op}, ins->inputs().at(0), ins->inputs().at(1), output); ins, miopen_gemm{op}, ins->inputs().at(0), ins->inputs().at(1), output);
......
#include <migraph/gpu/mul.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <utility>
namespace migraph {
namespace gpu {
shape hip_mul::compute_shape(const std::vector<shape>& inputs) const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes{inputs, *this}.has(3);
return inputs.at(0);
}
argument hip_mul::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::mul(ctx.get_stream().get(), args[2], args[0], args[1]);
return args[2];
}
} // namespace gpu
} // namespace migraph
...@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx, ...@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
miopenPoolingForward(ctx.handle.get(), miopenPoolingForward(ctx.get_stream().get_miopen(),
pd.get(), pd.get(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
......
...@@ -20,7 +20,7 @@ argument miopen_relu::compute(context& ctx, ...@@ -20,7 +20,7 @@ argument miopen_relu::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.handle.get(), miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(), ad.get(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
......
...@@ -10,6 +10,13 @@ rocblas_handle_ptr create_rocblas_handle_ptr() ...@@ -10,6 +10,13 @@ rocblas_handle_ptr create_rocblas_handle_ptr()
return rocblas_handle_ptr{handle}; return rocblas_handle_ptr{handle};
} }
rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
{
rocblas_handle_ptr rb = create_rocblas_handle_ptr();
rocblas_set_stream(rb.get(), s);
return rb;
}
} // namespace gpu } // namespace gpu
} // namespace migraph } // namespace migraph
...@@ -20,7 +20,7 @@ argument miopen_softmax::compute(context& ctx, ...@@ -20,7 +20,7 @@ argument miopen_softmax::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
miopenSoftmaxForward(ctx.handle.get(), miopenSoftmaxForward(ctx.get_stream().get_miopen(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
args[0].implicit(), args[0].implicit(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment