Commit 3644f0ec authored by Adam Osewski's avatar Adam Osewski
Browse files

Launch grid size which is min of occupancy vs tile count

parent 3d345953
......@@ -567,12 +567,11 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
EDataType,
HasMainKBlockLoop,
CGlobalMemoryDataOperation>;
return LaunchKernel(kernel, arg, stream_config);
return LaunchKernel(kernel, arg, dev_gemm_args, stream_config);
}
template <typename KernelFunction>
float LaunchKernel(const KernelFunction& kernel,
const Argument& arg,
int CalculateMaxOccupancyGridSize(const KernelFunction& kernel,
const StreamConfig& stream_config) const
{
// Calculate max number of workgroups that can simultaneously reside on the CU.
......@@ -592,13 +591,29 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
<< std::endl;
}
return cu_count * ck::math::min(num_blocks, CU_BLOCKS) * BLOCK_SUBSCRIPTION_FACTOR;
}
template <typename KernelFunction>
float LaunchKernel(const KernelFunction& kernel,
const Argument& arg,
const void* dev_gemm_args,
const StreamConfig& stream_config) const
{
int max_occupancy_grid_size = CalculateMaxOccupancyGridSize(kernel, stream_config);
// We launch the smaller number of workgroups from acutally needed tiles and the
// number of workgroups that maximize the GPU occupancy. That is because for some tile
// configuration the first is smaller than the latter. Launching too many workgroups
// mean some of them will have to iterate through all gemm problem descriptors just to
// find out they have nothing to do which is of course waste of GPU cycles.
return launch_and_time_kernel(
stream_config,
kernel,
dim3(cu_count * ck::math::min(num_blocks, CU_BLOCKS) * BLOCK_SUBSCRIPTION_FACTOR),
dim3(ck::math::min(arg.grid_size_, max_occupancy_grid_size)),
dim3(BlockSize),
0,
arg.p_workspace_,
dev_gemm_args,
arg.grid_size_,
arg.K_BATCH);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment