"git@developer.sourcefind.cn:OpenDAS/fairscale.git" did not exist on "7d7edf6d37576fb6eda65db6db43fda54a7f06ba"
Commit 89cccd81 authored by Tim Dettmers's avatar Tim Dettmers
Browse files

A tile multi-tiling.

parent 4decb3cc
...@@ -3061,10 +3061,10 @@ template <typename T, int BITS, int THREADS> __global__ void gemm_device(int M, ...@@ -3061,10 +3061,10 @@ template <typename T, int BITS, int THREADS> __global__ void gemm_device(int M,
T local_A[1]; T local_A[1];
T local_B[32]; T local_B[32];
const int a_tile_offset = (8*16 + 16); const int a_tile_offset = (16 + 16);
const int b_tile_offset = (16*32 + 16); const int b_tile_offset = (16*32 + 16);
__shared__ T smem_A[2*batch_size_warps*8*16 + (2*16*(batch_size_warps-1))]; __shared__ T smem_A[8*16 + (4*16*(batch_size_warps-1))];
__shared__ T smem_B[2*batch_size_warps*16*32 + (2*16*(batch_size_warps-1))]; __shared__ T smem_B[2*batch_size_warps*16*32 + (2*16*(batch_size_warps-1))];
__shared__ T smem_C[8*32]; __shared__ T smem_C[8*32];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment