Commit b8e153a4 authored by aska-0096's avatar aska-0096
Browse files

a fix

parent 44be6438
...@@ -87,8 +87,8 @@ using DeviceOpInstance = ...@@ -87,8 +87,8 @@ using DeviceOpInstance =
8, 8,
16, 16,
16, 16,
8, 4,
1, 2,
S<4, 64, 1>, S<4, 64, 1>,
S<1, 0, 2>, S<1, 0, 2>,
S<1, 0, 2>, S<1, 0, 2>,
...@@ -105,7 +105,7 @@ using DeviceOpInstance = ...@@ -105,7 +105,7 @@ using DeviceOpInstance =
true, true,
1, 1,
1, 1,
S<1, 16, 1, 16>, S<1, 32, 1, 8>,
8>; 8>;
int main(int argc, char* argv[]) int main(int argc, char* argv[])
......
...@@ -151,7 +151,7 @@ __global__ void ...@@ -151,7 +151,7 @@ __global__ void
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \
defined(__gfx1102__)) defined(__gfx1102__))
// printf("entry kernel launch"); // printf("entry kernel launch");
__shared__ char p_shared[GridwiseOp::GetSharedMemoryNumberOfByte()]; __shared__ char p_shared[GridwiseOp::SharedMemTrait::lds_size];
const index_t num_blocks_per_batch = const index_t num_blocks_per_batch =
__builtin_amdgcn_readfirstlane(get_grid_size() / batch_count); __builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
...@@ -241,7 +241,7 @@ __global__ void ...@@ -241,7 +241,7 @@ __global__ void
{ {
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \
defined(__gfx1102__)) defined(__gfx1102__))
__shared__ char p_shared[GridwiseOp::GetSharedMemoryNumberOfByte()]; __shared__ char p_shared[GridwiseOp::SharedMemTrait::lds_size];
GridwiseOp::template Run<HasMainKBlockLoop>(p_a_grid, GridwiseOp::template Run<HasMainKBlockLoop>(p_a_grid,
p_b_grid, p_b_grid,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment