Commit 5fcf30ba authored by wenjh's avatar wenjh
Browse files

Fix int8 gemm nt and wgrad


Signed-off-by: wenjh's avatarwenjh <wenjh@sugon.com>
parent 9fe13a33
...@@ -50,8 +50,8 @@ def get_full_tuning_space(): ...@@ -50,8 +50,8 @@ def get_full_tuning_space():
@triton.autotune( @triton.autotune(
configs= get_full_tuning_space() if tuning_full_space else [ configs= get_full_tuning_space() if tuning_full_space else [
# triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 2, 'kpack':2}, num_stages=2, num_warps=8), # triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 2, 'kpack':2}, num_stages=2, num_warps=8),
triton.Config({'BLOCK_SIZE_M': 16, 'BLOCK_SIZE_N': 64, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 2,}, num_stages=1, num_warps=4, enable_mmacfuse=2), # triton.Config({'BLOCK_SIZE_M': 16, 'BLOCK_SIZE_N': 64, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 2,}, num_stages=1, num_warps=4, enable_mmacfuse=2),
triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 8,}, num_stages=1, num_warps=4, enable_mmacfuse=2), triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': blockwise_fp8_block_len, 'BLOCK_SIZE_K': blockwise_fp8_block_len, 'GROUP_SIZE_M': 8,}, num_stages=1, num_warps=4, enable_mmacfuse=2),
], ],
key=['M', 'N', 'K'], key=['M', 'N', 'K'],
# reset_to_zero=['c_ptr'] # reset_to_zero=['c_ptr']
......
...@@ -50,8 +50,8 @@ def get_full_tuning_space(): ...@@ -50,8 +50,8 @@ def get_full_tuning_space():
@triton.autotune( @triton.autotune(
configs= get_full_tuning_space() if tuning_full_space else [ configs= get_full_tuning_space() if tuning_full_space else [
# triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 2, 'kpack':2}, num_stages=2, num_warps=8), # triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 2, 'kpack':2}, num_stages=2, num_warps=8),
triton.Config({'BLOCK_SIZE_M': 16, 'BLOCK_SIZE_N': 64, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 2,}, num_stages=1, num_warps=4, enable_mmacfuse=2), # triton.Config({'BLOCK_SIZE_M': 16, 'BLOCK_SIZE_N': 64, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 2,}, num_stages=1, num_warps=4, enable_mmacfuse=2),
triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 8,}, num_stages=1, num_warps=4, enable_mmacfuse=2), triton.Config({'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': blockwise_fp8_block_len, 'BLOCK_SIZE_K': blockwise_fp8_block_len, 'GROUP_SIZE_M': 8,}, num_stages=1, num_warps=4, enable_mmacfuse=2),
], ],
key=['M', 'N', 'K'], key=['M', 'N', 'K'],
# reset_to_zero=['c_ptr'] # reset_to_zero=['c_ptr']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment