Commit b398481e authored by Adam Osewski's avatar Adam Osewski
Browse files

Fix synchronization scheme.

parent 91343ec1
...@@ -116,7 +116,7 @@ class StridedReductionTileLoop ...@@ -116,7 +116,7 @@ class StridedReductionTileLoop
index_t neighbour_count = 0; index_t neighbour_count = 0;
if(tiles_per_block_ < k_tiles) if(tiles_per_block_ < k_tiles)
{ {
// Since we can have deviation (+1) in neighbours number // Since we can have deviation (+/-1) in neighbours number
// we calculate how many workgroups are needed to process the k-tiles left. // we calculate how many workgroups are needed to process the k-tiles left.
neighbour_count = (k_tiles - k_tile_idx - 1 + tiles_per_block_ - 1) / tiles_per_block_; neighbour_count = (k_tiles - k_tile_idx - 1 + tiles_per_block_ - 1) / tiles_per_block_;
} }
...@@ -139,7 +139,9 @@ class StridedReductionTileLoop ...@@ -139,7 +139,9 @@ class StridedReductionTileLoop
if(neighbour_count > 0) if(neighbour_count > 0)
{ {
finished_block_flags_.wait_lt( // Also count this workgroup
neighbour_count++;
finished_block_flags_.wait_eq(
GetWorkgroupFlagIdx(k_tiles, output_tile_idx, output_tile_idx_offset), GetWorkgroupFlagIdx(k_tiles, output_tile_idx, output_tile_idx_offset),
neighbour_count); neighbour_count);
} }
......
...@@ -156,7 +156,7 @@ __global__ void grouped_gemm_naive_strided_tile_loop_reduce(const GemmArgDesc* p ...@@ -156,7 +156,7 @@ __global__ void grouped_gemm_naive_strided_tile_loop_reduce(const GemmArgDesc* p
// Accumulate partial results. We can have different # of workgroups to reduce, thus we // Accumulate partial results. We can have different # of workgroups to reduce, thus we
// read actual flag value. // read actual flag value.
for(index_t i = 1; i <= neighbour_count; ++i) for(index_t i = 1; i < neighbour_count; ++i)
{ {
partial_result += p_workspace[(get_block_1d_id()) * MPerBlock * NPerBlock + partial_result += p_workspace[(get_block_1d_id()) * MPerBlock * NPerBlock +
i * MPerBlock * NPerBlock + get_thread_local_1d_id()]; i * MPerBlock * NPerBlock + get_thread_local_1d_id()];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment