Commit e63c09c5 authored by Paul's avatar Paul
Browse files

Add initial conitiguous pass

parent 39151d27
......@@ -70,7 +70,35 @@ struct check_shapes
const check_shapes& same_ndims() const
{
if(!this->same([](const shape& s) { return s.lens().size(); }))
MIGRAPH_THROW(prefix() + "Dimensions do not match");
MIGRAPH_THROW(prefix() + "Number of dimensions do not match");
return *this;
}
const check_shapes& standard() const
{
if(!this->all_of([](const shape& s) { return s.standard(); }))
MIGRAPH_THROW(prefix() + "Shapes are not in standard layout");
return *this;
}
const check_shapes& packed() const
{
if(!this->all_of([](const shape& s) { return s.packed(); }))
MIGRAPH_THROW(prefix() + "Shapes are not packed");
return *this;
}
const check_shapes& not_transposed() const
{
if(!this->all_of([](const shape& s) { return not s.transposed(); }))
MIGRAPH_THROW(prefix() + "Shapes are transposed");
return *this;
}
const check_shapes& not_broadcasted() const
{
// if(!this->all_of([](const shape& s) { return not s.broadcasted(); }))
// MIGRAPH_THROW(prefix() + "Shapes are broadcasted");
return *this;
}
......
......@@ -38,6 +38,8 @@ struct instruction
result = r;
for(auto&& ins : output)
{
assert(ins->op.name().front() != '@');
std::cout << ins->op.name() << std::endl;
ins->replace(compute_shape(ins->op, ins->arguments));
}
}
......@@ -122,6 +124,7 @@ inline void backreference(instruction_ref ref)
// TODO: Use const ref for vector
inline shape compute_shape(operation op, std::vector<instruction_ref> args)
{
std::cout << "compute_shape: " << op.name() << std::endl;
std::vector<shape> shapes(args.size());
std::transform(
args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->result; });
......
......@@ -80,11 +80,13 @@ struct shape
/// Returns true if the shape is packed with no padding
bool packed() const;
/// Returns true is the shape has been transposed. That is the strides are not in descending order
/// Returns true is the shape has been transposed. That is the strides are not in descending
/// order
bool transposed() const;
/// Returns true if the shape is broadcasting a dimension. That is, one of the strides are zero
bool broadcasted() const;
/// Returns true if the shape is in its standard format. That is, the shape is both packed and not transposed.
/// Returns true if the shape is in its standard format. That is, the shape is both packed and
/// not transposed.
bool standard() const;
friend bool operator==(const shape& x, const shape& y);
......
......@@ -32,6 +32,7 @@ program::insert_instruction(instruction_ref ins, operation op, std::vector<instr
assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
"Argument is not an exisiting instruction");
assert(not starts_with(op.name(), "@"));
// TODO: Use move
shape r = compute_shape(op, args);
auto result = impl->instructions.insert(ins, {op, r, args});
......@@ -46,6 +47,7 @@ program::replace_instruction(instruction_ref ins, operation op, std::vector<inst
assert(std::all_of(
args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
"Argument is not an exisiting instruction");
assert(not starts_with(op.name(), "@"));
shape r = compute_shape(op, args);
ins->replace(op, r, args);
......
......@@ -5,16 +5,42 @@
namespace migraph {
namespace gpu {
struct index
{
std::size_t global;
std::size_t local;
std::size_t group;
};
template<class F>
__global__ void launcher(F f)
{
index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x};
f(idx);
}
auto launch(std::size_t global, std::size_t local)
{
return [&](auto f) {
assert(local > 0);
assert(global > 0);
using f_type = decltype(f);
dim3 nblocks(global / local);
dim3 nthreads(local);
hipLaunchKernelGGL((launcher<f_type>),
nblocks,
nthreads,
0,
nullptr,
f);
};
}
template <class F>
void visit_tensor_size(std::size_t n, F f)
{
switch(n)
{
case 0:
{
f(std::integral_constant<std::size_t, 0>{});
break;
}
case 1:
{
f(std::integral_constant<std::size_t, 1>{});
......@@ -86,48 +112,43 @@ struct hip_tensor_descriptor
size_t strides[NDim] = {};
};
template <typename T, size_t NDim>
__global__ void contiguous_gpu(const T* a,
hip_tensor_descriptor<NDim> a_desc,
T* at,
hip_tensor_descriptor<NDim> at_desc,
size_t nelements)
{
for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
i += blockDim.x * gridDim.x)
{
hip_index<NDim> s = at_desc.multi(i);
size_t lidx = a_desc.linear(s);
at[i] = a[lidx];
}
}
// template <typename T, size_t NDim>
// __global__ void contiguous_gpu(const T* a,
// hip_tensor_descriptor<NDim> a_desc,
// T* at,
// hip_tensor_descriptor<NDim> at_desc,
// size_t nelements)
// {
// for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
// i += blockDim.x * gridDim.x)
// {
// hip_index<NDim> s = at_desc.multi(i);
// size_t lidx = a_desc.linear(s);
// at[i] = a[lidx];
// }
// }
void hip_contiguous(migraph::shape output_shape, migraph::argument arg, migraph::argument result)
{
size_t ndim = output_shape.lens().size();
visit_all(result, arg)([&](auto output, auto input) {
if(ndim == 4)
{
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
const auto& s = arg.get_shape();
hip_tensor_descriptor<4> a_desc(s.lens(), s.strides());
hip_tensor_descriptor<4> at_desc(output_shape.lens(), output_shape.strides());
dim3 nblocks(512);
dim3 nthreads(512);
hipLaunchKernelGGL((contiguous_gpu<int, 4>),
nblocks,
nthreads,
0,
nullptr,
input.data(),
a_desc,
output.data(),
at_desc,
s.elements());
}
else
{
MIGRAPH_THROW("contiguous is only valid for 4D tensors");
}
hip_tensor_descriptor<ndim> a_desc(s.lens(), s.strides());
hip_tensor_descriptor<ndim> at_desc(output_shape.lens(), output_shape.strides());
auto* a = input.data();
auto* at = output.data();
auto nelements = s.elements();
std::size_t nlocal = 512;
std::size_t nglobal = 512*nlocal;
launch(nglobal, nlocal)([=](auto idx) mutable {
for(size_t i = idx.global; i < nelements; i += nglobal)
{
size_t lidx = a_desc.linear(at_desc.multi(i));
at[i] = a[lidx];
}
});
});
});
}
} // namespace gpu
......
......@@ -23,7 +23,7 @@ struct miopen_convolution
std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
check_shapes{inputs, *this}.has(3).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)});
}
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
......@@ -74,7 +74,7 @@ struct miopen_pooling
std::string name() const { return "gpu::pooling"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
check_shapes{inputs, *this}.has(2).standard();
return op.compute_shape({inputs.at(1)});
}
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
......@@ -105,7 +105,7 @@ struct miopen_add
std::string name() const { return "gpu::add"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
check_shapes{inputs, *this}.has(3).not_broadcasted();
return inputs.at(0);
}
......@@ -207,7 +207,7 @@ struct miopen_relu
std::string name() const { return "gpu::relu"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
......
......@@ -3,13 +3,22 @@
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp>
namespace migraph {
namespace gpu {
std::vector<pass> target::get_passes(migraph::context&) const
{
return {lowering{}, write_literals{}, check_context<context>{}};
// clang-format off
return
{
auto_contiguous{},
lowering{},
write_literals{},
check_context<context>{}
};
// clang-format on
}
std::string target::name() const { return "miopen"; }
......
......@@ -51,8 +51,41 @@ void after_literal_broadcast()
EXPECT(not p.get_shape().broadcasted());
}
void after_param_transpose()
{
migraph::program p;
auto l = p.add_parameter("2x2", {migraph::shape::float_type, {2, 2}});
EXPECT(p.get_shape().standard());
EXPECT(not p.get_shape().transposed());
auto t = p.add_instruction(migraph::transpose{{1, 0}}, l);
p.add_instruction(pass_op{}, t);
EXPECT(not p.get_shape().standard());
EXPECT(p.get_shape().transposed());
p.compile(contiguous_target{});
EXPECT(p.get_shape().standard());
EXPECT(not p.get_shape().transposed());
}
void after_param_broadcast()
{
migraph::program p;
auto l1 = p.add_parameter("2x2", {migraph::shape::float_type, {2, 2}});
auto l2 = p.add_parameter("2", {migraph::shape::float_type, {2}});
EXPECT(p.get_shape().standard());
EXPECT(not p.get_shape().broadcasted());
auto b = p.add_instruction(migraph::broadcast{}, l1, l2);
p.add_instruction(pass_op{}, b);
EXPECT(not p.get_shape().standard());
EXPECT(p.get_shape().broadcasted());
p.compile(contiguous_target{});
EXPECT(p.get_shape().standard());
EXPECT(not p.get_shape().broadcasted());
}
int main()
{
after_literal_transpose();
after_literal_broadcast();
after_param_transpose();
after_param_broadcast();
}
......@@ -14,6 +14,28 @@
#include "test.hpp"
#include "verify.hpp"
struct auto_eval
{
migraph::program* p;
migraph::program::parameter_map * m;
migraph::argument result;
auto_eval(migraph::program& pp, migraph::program::parameter_map& pm)
: p(&pp), m(&pm)
{}
migraph::argument operator()() const
{
return p->eval(*m);
}
~auto_eval()
{
if(std::uncaught_exception())
std::cout << *p << std::endl;
}
};
template <class V>
migraph::argument run_cpu()
{
......@@ -25,7 +47,7 @@ migraph::argument run_cpu()
{
m[x.first] = migraph::generate_argument(x.second);
}
return p.eval(m);
return auto_eval(p, m)();
}
template <class V>
......@@ -41,7 +63,7 @@ migraph::argument run_gpu()
m[x.first] = migraph::gpu::to_gpu(migraph::generate_argument(x.second));
}
return migraph::gpu::from_gpu(p.eval(m));
return migraph::gpu::from_gpu(auto_eval(p, m)());
}
template <class V>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment