"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "38376959a508d0b81094a0c6ea62afd98c9122eb"
Commit 800f0ab3 authored by Chao Liu's avatar Chao Liu
Browse files

update hello world example

parent a0addb61
...@@ -14,11 +14,13 @@ ...@@ -14,11 +14,13 @@
// program // program
struct HelloWorld struct HelloWorld
{ {
__host__ __device__ void operator()(TileProgram& tp, int x, int y) __host__ __device__ void operator()(TileProgram& tp, int x, int y, int* res)
{ {
auto desc = tp.make_naive_tensor_descriptor_packed(ck::make_tuple(x)); auto desc0 = tp(make_naive_tensor_descriptor_packed(ck::make_tuple(x)));
auto desc1 = tp(make_naive_tensor_descriptor_packed(ck::make_tuple(y)));
printf("length %d\n", desc.GetLength(ck::Number<0>{})); res[0] = desc0.GetLength(ck::Number<0>{});
res[1] = desc1.GetLength(ck::Number<0>{});
} }
}; };
...@@ -27,7 +29,16 @@ int main() ...@@ -27,7 +29,16 @@ int main()
int x = 100; int x = 100;
int y = 101; int y = 101;
launch(HelloWorld{}, 1, 1, x, y); DeviceMem res_dev_buf(2 * sizeof(int));
launch(HelloWorld{}, 1, 1, x, y, static_cast<int*>(res_dev_buf.GetDeviceBuffer()));
int res_host[2];
res_dev_buf.FromDevice(res_host);
printf("res_host %d\n", res_host[0]);
printf("res_host %d\n", res_host[1]);
return 0; return 0;
} }
...@@ -6,66 +6,71 @@ ...@@ -6,66 +6,71 @@
#include "ck/tensor_description/tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
// namespace tp (for tile programming) // hidden intermediate argument
struct TileProgram struct Arg
{ {
// hidden intermediate argument char data_[128];
struct Arg ck::index_t size_ = 0;
ck::index_t pos_ = 0;
__host__ __device__ void reset()
{ {
char data_[1024]; size_ = 0;
ck::index_t size_ = 0; pos_ = 0;
}; }
// arg on device __device__ void reset_pos() { pos_ = 0; }
Arg arg_;
ck::index_t arg_pos_ = 0;
// push arg on host // push arg on host
template <typename T> template <typename T>
__host__ auto push_arg(const T& a) __host__ T push(const T& a)
{ {
*reinterpret_cast<T*>(arg_.data_ + arg_.size_) = a; *reinterpret_cast<T*>(data_ + size_) = a;
arg_.size_ += sizeof(T); size_ += sizeof(T);
return a; return a;
} }
// pull arg on device // pull arg on device
template <typename T> template <typename T>
__device__ T pull_arg() __device__ T pull()
{ {
auto a = *reinterpret_cast<T*>(arg_.data_ + arg_pos_); T a = *reinterpret_cast<T*>(data_ + pos_);
arg_pos_ += sizeof(T); pos_ += sizeof(T);
return a; return a;
} }
};
// host push // namespace tp (for tile programming)
template <typename... Lengths> struct TileProgram
__host__ constexpr auto {
make_naive_tensor_descriptor_packed(const ck::Tuple<Lengths...>& lengths) // arg on device
{ Arg arg_;
auto desc = ck::make_naive_tensor_descriptor_packed(lengths);
return push_arg(desc); __device__ void gpu_init() { arg_.reset_pos(); }
}
// device pull // push arg on host
template <typename... Lengths> template <typename T>
__device__ constexpr auto __host__ T operator()(const T& a)
make_naive_tensor_descriptor_packed(const ck::Tuple<Lengths...>& lengths)
{ {
using Desc = decltype(ck::make_naive_tensor_descriptor_packed(lengths)); return arg_.push(a);
}
return pull_arg<Desc>(); // push arg on host
template <typename T>
__device__ T operator()(const T&)
{
return arg_.pull<T>();
} }
}; };
template <typename Program, typename... Xs> template <typename Program, typename... Xs>
__global__ void gpu_program_wrapper(Program f, TileProgram tp, Xs... xs) __global__ void gpu_program_wrapper(Program f, TileProgram tp, Xs... xs)
{ {
tp.gpu_init();
f(tp, xs...); f(tp, xs...);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment