Commit 92e1588d authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Add CONSTANT decorator for descriptor read buffer

parent f0019df3
...@@ -277,15 +277,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -277,15 +277,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)BlkGroupSize; (void)BlkGroupSize;
(void)ws_buf2_bytes_offset; (void)ws_buf2_bytes_offset;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -278,15 +278,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -278,15 +278,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)BlkGroupSize; (void)BlkGroupSize;
(void)ws_buf2_bytes_offset; (void)ws_buf2_bytes_offset;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -279,16 +279,16 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -279,16 +279,16 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)p_dst_global; (void)p_dst_global;
(void)indices_global; (void)indices_global;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
void* ws_buf1_global = static_cast<char*>(ws_global) + 4096; void* ws_buf1_global = const_cast<char*>(static_cast<const char*>(p_src2dDesc) + 4096);
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -279,16 +279,16 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -279,16 +279,16 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)p_dst_global; (void)p_dst_global;
(void)indices_global; (void)indices_global;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
void* ws_buf1_global = static_cast<char*>(ws_global) + 4096; void* ws_buf1_global = const_cast<char*>(static_cast<const char*>(p_src2dDesc) + 4096);
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -290,15 +290,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -290,15 +290,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)BlkGroupSize; (void)BlkGroupSize;
(void)ws_buf2_bytes_offset; (void)ws_buf2_bytes_offset;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -291,15 +291,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -291,15 +291,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)BlkGroupSize; (void)BlkGroupSize;
(void)ws_buf2_bytes_offset; (void)ws_buf2_bytes_offset;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -291,15 +291,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -291,15 +291,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)BlkGroupSize; (void)BlkGroupSize;
(void)ws_buf2_bytes_offset; (void)ws_buf2_bytes_offset;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -292,15 +292,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen, ...@@ -292,15 +292,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)BlkGroupSize; (void)BlkGroupSize;
(void)ws_buf2_bytes_offset; (void)ws_buf2_bytes_offset;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -237,15 +237,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen, ...@@ -237,15 +237,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)p_src_global; (void)p_src_global;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
void* ws_buf1_global = static_cast<char*>(ws_global) + 4096; void* ws_buf1_global = const_cast<char*>(static_cast<const char*>(p_src2dDesc) + 4096);
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -251,15 +251,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen, ...@@ -251,15 +251,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)p_src_global; (void)p_src_global;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
void* ws_buf1_global = static_cast<char*>(ws_global) + 4096; void* ws_buf1_global = const_cast<char*>(static_cast<const char*>(p_src2dDesc) + 4096);
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
...@@ -252,15 +252,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen, ...@@ -252,15 +252,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen,
const void* __restrict__ p_src_global, const void* __restrict__ p_src_global,
float beta, float beta,
void* __restrict__ p_dst_global, void* __restrict__ p_dst_global,
void* __restrict__ ws_global, const void CONSTANT* ws_global,
long ws_buf2_bytes_offset, long ws_buf2_bytes_offset,
void* __restrict__ indices_global) void* __restrict__ indices_global)
{ {
(void)p_src_global; (void)p_src_global;
const void* p_src2dDesc = ws_global; const void* p_src2dDesc = cast_pointer_to_generic_address_space(ws_global);
const void* p_dst1dDesc = static_cast<char*>(ws_global) + 2048; const void* p_dst1dDesc = static_cast<const char*>(p_src2dDesc) + 2048;
void* ws_buf1_global = static_cast<char*>(ws_global) + 4096; void* ws_buf1_global = const_cast<char*>(static_cast<const char*>(p_src2dDesc) + 4096);
const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc); const auto src2dDesc = get_reduction_src2d_descriptor<src2d_need_padding>(p_src2dDesc);
const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc); const auto dst1dDesc = get_reduction_dst1d_descriptor<dst1d_need_padding>(p_dst1dDesc);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment