Commit 3e2c63a7 authored by zhuwenwen's avatar zhuwenwen
Browse files

update the configuration of triton fa

parent f39feef5
...@@ -311,7 +311,7 @@ def _attn_fwd_inner( ...@@ -311,7 +311,7 @@ def _attn_fwd_inner(
triton.Config({'BLOCK_M': 32, 'BLOCK_N': 32, 'waves_per_eu': 0, 'PRE_LOAD_V': False}, num_stages=1, num_warps=8), triton.Config({'BLOCK_M': 32, 'BLOCK_N': 32, 'waves_per_eu': 0, 'PRE_LOAD_V': False}, num_stages=1, num_warps=8),
# TODO: This config fails with head_size not pow2 with data mismatches. Check why. # TODO: This config fails with head_size not pow2 with data mismatches. Check why.
# triton.Config({'BLOCK_M': 32, 'BLOCK_N': 16, 'waves_per_eu': 1, 'PRE_LOAD_V': False}, num_stages=1, num_warps=4), # triton.Config({'BLOCK_M': 32, 'BLOCK_N': 16, 'waves_per_eu': 1, 'PRE_LOAD_V': False}, num_stages=1, num_warps=4),
triton.Config({'BLOCK_M': 16, 'BLOCK_N': 16, 'waves_per_eu': 0, 'PRE_LOAD_V': False}, num_stages=1, num_warps=4), # triton.Config({'BLOCK_M': 16, 'BLOCK_N': 16, 'waves_per_eu': 0, 'PRE_LOAD_V': False}, num_stages=1, num_warps=4),
], ],
key=['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL'], key=['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL'],
# use_cuda_graph=True, # use_cuda_graph=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment