Commit 800f0ab3 authored by Chao Liu's avatar Chao Liu
Browse files

update hello world example

parent a0addb61
......@@ -14,11 +14,13 @@
// program
struct HelloWorld
{
__host__ __device__ void operator()(TileProgram& tp, int x, int y)
__host__ __device__ void operator()(TileProgram& tp, int x, int y, int* res)
{
auto desc = tp.make_naive_tensor_descriptor_packed(ck::make_tuple(x));
auto desc0 = tp(make_naive_tensor_descriptor_packed(ck::make_tuple(x)));
auto desc1 = tp(make_naive_tensor_descriptor_packed(ck::make_tuple(y)));
printf("length %d\n", desc.GetLength(ck::Number<0>{}));
res[0] = desc0.GetLength(ck::Number<0>{});
res[1] = desc1.GetLength(ck::Number<0>{});
}
};
......@@ -27,7 +29,16 @@ int main()
int x = 100;
int y = 101;
launch(HelloWorld{}, 1, 1, x, y);
DeviceMem res_dev_buf(2 * sizeof(int));
launch(HelloWorld{}, 1, 1, x, y, static_cast<int*>(res_dev_buf.GetDeviceBuffer()));
int res_host[2];
res_dev_buf.FromDevice(res_host);
printf("res_host %d\n", res_host[0]);
printf("res_host %d\n", res_host[1]);
return 0;
}
......@@ -6,66 +6,71 @@
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
// namespace tp (for tile programming)
struct TileProgram
// hidden intermediate argument
struct Arg
{
// hidden intermediate argument
struct Arg
char data_[128];
ck::index_t size_ = 0;
ck::index_t pos_ = 0;
__host__ __device__ void reset()
{
char data_[1024];
ck::index_t size_ = 0;
};
size_ = 0;
pos_ = 0;
}
// arg on device
Arg arg_;
ck::index_t arg_pos_ = 0;
__device__ void reset_pos() { pos_ = 0; }
// push arg on host
template <typename T>
__host__ auto push_arg(const T& a)
__host__ T push(const T& a)
{
*reinterpret_cast<T*>(arg_.data_ + arg_.size_) = a;
*reinterpret_cast<T*>(data_ + size_) = a;
arg_.size_ += sizeof(T);
size_ += sizeof(T);
return a;
}
// pull arg on device
template <typename T>
__device__ T pull_arg()
__device__ T pull()
{
auto a = *reinterpret_cast<T*>(arg_.data_ + arg_pos_);
T a = *reinterpret_cast<T*>(data_ + pos_);
arg_pos_ += sizeof(T);
pos_ += sizeof(T);
return a;
}
};
// host push
template <typename... Lengths>
__host__ constexpr auto
make_naive_tensor_descriptor_packed(const ck::Tuple<Lengths...>& lengths)
{
auto desc = ck::make_naive_tensor_descriptor_packed(lengths);
// namespace tp (for tile programming)
struct TileProgram
{
// arg on device
Arg arg_;
return push_arg(desc);
}
__device__ void gpu_init() { arg_.reset_pos(); }
// device pull
template <typename... Lengths>
__device__ constexpr auto
make_naive_tensor_descriptor_packed(const ck::Tuple<Lengths...>& lengths)
// push arg on host
template <typename T>
__host__ T operator()(const T& a)
{
using Desc = decltype(ck::make_naive_tensor_descriptor_packed(lengths));
return arg_.push(a);
}
return pull_arg<Desc>();
// push arg on host
template <typename T>
__device__ T operator()(const T&)
{
return arg_.pull<T>();
}
};
template <typename Program, typename... Xs>
__global__ void gpu_program_wrapper(Program f, TileProgram tp, Xs... xs)
{
tp.gpu_init();
f(tp, xs...);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment