add dim=32 64

6fa4feac · letaoqin · 9e49c2bf · 6fa4feac · 6fa4feac
Commit 6fa4feac authored Aug 03, 2023 by letaoqin
2 changed files
--- a/example/52_flash_atten_bias/batched_multihead_attention_bias_forward_v2.cpp
+++ b/example/52_flash_atten_bias/batched_multihead_attention_bias_forward_v2.cpp
@@ -9,7 +9,7 @@ Gemm + Softmax + Gemm fused operation. Computes C_g_m_o = Softmax(A_g_m_k * B0_g
                                                                          Gemm1
 */
-#define DIM 128 // DIM should be a multiple of 8.
+#define DIM 64 // DIM should be a multiple of 8.
 #include <iostream>
 #include <numeric>
@@ -80,7 +80,7 @@ static constexpr bool Deterministic = false;
 #if(DIM <= 32)
 using DeviceGemmInstance =
-    ck::tensor_operation::device::DeviceBatchedMultiheadAttentionForward_Xdl_CShuffle_V2<
+    ck::tensor_operation::device::DeviceBatchedMultiheadAttentionBiasForward_Xdl_CShuffle_V2<
        NumDimG,
        NumDimM,
        NumDimN,
@@ -151,7 +151,7 @@ using DeviceGemmInstance =
        Deterministic>;
 #elif(DIM <= 64)
 using DeviceGemmInstance =
-    ck::tensor_operation::device::DeviceBatchedMultiheadAttentionForward_Xdl_CShuffle_V2<
+    ck::tensor_operation::device::DeviceBatchedMultiheadAttentionBiasForward_Xdl_CShuffle_V2<
        NumDimG,
        NumDimM,
        NumDimN,

--- a/example/52_flash_atten_bias/run_batched_multihead_attention_bias_forward.inc
+++ b/example/52_flash_atten_bias/run_batched_multihead_attention_bias_forward.inc
@@ -23,7 +23,7 @@ int run(int argc, char* argv[])
    bool input_permute  = false;
    bool output_permute = true;
-    float p_drop                    = 0;
+    float p_drop                    = 0.1;
    const unsigned long long seed   = 1;
    const unsigned long long offset = 0;