"vscode:/vscode.git/clone" did not exist on "94220a5817547fbb57208647550a0e0afb6431cb"
Commit 9ca5a6fc authored by Paul's avatar Paul
Browse files

Use block size divisible by 32 and dont make global divisible by block size

parent 8045f7c8
...@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) ...@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t groups = (n + local - 1) / local; std::size_t groups = (n + local - 1) / local;
std::size_t max_blocks = max_global / local; std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local; std::size_t nglobal = std::min(max_blocks * over, groups) * local;
return nglobal; return std::min(nglobal, n);
}; };
} }
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size) std::size_t compute_block_size(std::size_t n, std::size_t max_block_size)
{ {
size_t block_size = 128; const std::size_t min_block_size = 64;
while(block_size <= max_block_size and block_size <= n) const std::size_t base_block_size = 32;
block_size *= 2; auto block_size = (((n - 1) / base_block_size + 1)) * base_block_size;
return block_size / 2; return std::min(std::max(min_block_size, block_size), max_block_size);
} }
operation compile_hip_code_object(const std::string& content, hip_compile_options options) operation compile_hip_code_object(const std::string& content, hip_compile_options options)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment