Commit a634c224 authored by ltqin's avatar ltqin
Browse files

change parameter for k=80

parent 807ac476
......@@ -25,7 +25,7 @@ Kernel outputs:
#define PRINT_HOST 0
#define USING_MASK 1
#define USING_K128 0
#define USING_K128 1
#include <iostream>
#include <numeric>
......@@ -119,9 +119,9 @@ using DeviceGemmInstance =
256,
128, // MPerBlock
128, // NPerBlock
32, // KPerBlock
64, // KPerBlock
128, // Gemm1NPerBlock
64, // Gemm1KPerBlock
32, // Gemm1KPerBlock
8, // AK1
8, // BK1
2, // B1K1
......@@ -188,9 +188,9 @@ using DeviceGemmInstance =
256,
128, // MPerBlock
128, // NPerBlock
32, // KPerBlock
64, // KPerBlock
64, // Gemm1NPerBlock
32, // Gemm1KPerBlock
64, // Gemm1KPerBlock
8, // AK1
8, // BK1
2, // B1K1
......
......@@ -1175,9 +1175,10 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
const float p_drop,
ck::philox& ph)
{
const FloatGemmAcc p_dropout = type_convert<FloatGemmAcc>(1.0f - p_drop);
const FloatGemmAcc rp_dropout = type_convert<FloatGemmAcc>(1.0f / p_dropout);
const ushort p_dropout_in_16bits = uint16_t(std::floor(p_dropout * 65535.0));
const FloatGemmAcc p_dropout = type_convert<FloatGemmAcc>(1.0f - p_drop);
const FloatGemmAcc rp_dropout = type_convert<FloatGemmAcc>(1.0f / p_dropout);
const ushort p_dropout_in_16bits =
__builtin_amdgcn_readfirstlane(std::floor(p_dropout * 65535.0));
const tensor_operation::element_wise::Scale scale_rp_dropout(s_element_op.Value() *
rp_dropout);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment