Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
346b4e8d
Commit
346b4e8d
authored
Jan 30, 2023
by
ltqin
Browse files
change parameter:remove p_dropout_in_16bits
parent
c2d566ff
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
14 deletions
+6
-14
example/32_batched_gemm_scale_softmax_gemm/batched_multihead_attention_backward_fp16_dropout.cpp
...emm/batched_multihead_attention_backward_fp16_dropout.cpp
+2
-3
include/ck/tensor_operation/gpu/device/impl/device_batched_multihead_attention_backward_train_xdl_cshuffle.hpp
...tched_multihead_attention_backward_train_xdl_cshuffle.hpp
+2
-9
include/ck/tensor_operation/gpu/grid/gridwise_batched_multihead_attention_backward_xdl_cshuffle_v2.hpp
..._batched_multihead_attention_backward_xdl_cshuffle_v2.hpp
+2
-2
No files found.
example/32_batched_gemm_scale_softmax_gemm/batched_multihead_attention_backward_fp16_dropout.cpp
View file @
346b4e8d
...
@@ -270,8 +270,8 @@ int run(int argc, char* argv[])
...
@@ -270,8 +270,8 @@ int run(int argc, char* argv[])
ck
::
index_t
N
=
512
;
ck
::
index_t
N
=
512
;
ck
::
index_t
K
=
128
;
ck
::
index_t
K
=
128
;
ck
::
index_t
O
=
128
;
ck
::
index_t
O
=
128
;
ck
::
index_t
G0
=
1
;
ck
::
index_t
G0
=
3
;
ck
::
index_t
G1
=
1
;
ck
::
index_t
G1
=
2
;
float
alpha
=
1.
f
/
std
::
sqrt
(
K
);
float
alpha
=
1.
f
/
std
::
sqrt
(
K
);
...
@@ -285,7 +285,6 @@ int run(int argc, char* argv[])
...
@@ -285,7 +285,6 @@ int run(int argc, char* argv[])
const
unsigned
long
long
seed
=
1
;
const
unsigned
long
long
seed
=
1
;
const
unsigned
long
long
offset
=
0
;
const
unsigned
long
long
offset
=
0
;
if
(
argc
==
1
)
if
(
argc
==
1
)
{
{
// use default case
// use default case
...
...
include/ck/tensor_operation/gpu/device/impl/device_batched_multihead_attention_backward_train_xdl_cshuffle.hpp
View file @
346b4e8d
...
@@ -82,7 +82,6 @@ __global__ void
...
@@ -82,7 +82,6 @@ __global__ void
const
index_t
batch_count
,
const
index_t
batch_count
,
const
ComputeBasePtrOfStridedBatch
compute_base_ptr_of_batch
,
const
ComputeBasePtrOfStridedBatch
compute_base_ptr_of_batch
,
const
C0MatrixMask
c0_matrix_mask
,
const
C0MatrixMask
c0_matrix_mask
,
const
ushort
p_dropout_in_16bits
,
const
float
p_dropout
,
const
float
p_dropout
,
const
unsigned
long
long
seed
,
const
unsigned
long
long
seed
,
const
unsigned
long
long
offset
)
const
unsigned
long
long
offset
)
...
@@ -137,7 +136,6 @@ __global__ void
...
@@ -137,7 +136,6 @@ __global__ void
ygrad_grid_desc_m0_o_m1
,
ygrad_grid_desc_m0_o_m1
,
block_2_ctile_map
,
block_2_ctile_map
,
c0_matrix_mask
,
c0_matrix_mask
,
p_dropout_in_16bits
,
p_dropout
,
p_dropout
,
ph
);
ph
);
#else
#else
...
@@ -778,10 +776,8 @@ struct DeviceBatchedMultiheadAttentionBackward_Train_Xdl_CShuffle
...
@@ -778,10 +776,8 @@ struct DeviceBatchedMultiheadAttentionBackward_Train_Xdl_CShuffle
y_grid_desc_m_o_
);
y_grid_desc_m_o_
);
}
}
p_dropout_
=
1.
f
-
p_drop
;
p_dropout_
=
1.
f
-
p_drop
;
p_dropout_in_16bits_
=
uint16_t
(
std
::
floor
(
p_dropout_
*
65535.0
));
float
rp_dropout_
=
1.
f
/
p_dropout_
;
rp_dropout_
=
1.
f
/
p_dropout_
;
acc_element_op_
.
Append
(
rp_dropout_
);
acc_element_op_
.
Append
(
rp_dropout_
);
seed_
=
std
::
get
<
0
>
(
seeds
);
seed_
=
std
::
get
<
0
>
(
seeds
);
...
@@ -875,8 +871,6 @@ struct DeviceBatchedMultiheadAttentionBackward_Train_Xdl_CShuffle
...
@@ -875,8 +871,6 @@ struct DeviceBatchedMultiheadAttentionBackward_Train_Xdl_CShuffle
ComputeBasePtrOfStridedBatch
compute_base_ptr_of_batch_
;
ComputeBasePtrOfStridedBatch
compute_base_ptr_of_batch_
;
float
p_dropout_
;
float
p_dropout_
;
ushort
p_dropout_in_16bits_
;
GemmAccDataType
rp_dropout_
;
unsigned
long
long
seed_
;
unsigned
long
long
seed_
;
unsigned
long
long
offset_
;
unsigned
long
long
offset_
;
};
};
...
@@ -958,7 +952,6 @@ struct DeviceBatchedMultiheadAttentionBackward_Train_Xdl_CShuffle
...
@@ -958,7 +952,6 @@ struct DeviceBatchedMultiheadAttentionBackward_Train_Xdl_CShuffle
arg
.
batch_count_
,
arg
.
batch_count_
,
arg
.
compute_base_ptr_of_batch_
,
arg
.
compute_base_ptr_of_batch_
,
arg
.
c0_matrix_mask_
,
arg
.
c0_matrix_mask_
,
arg
.
p_dropout_in_16bits_
,
arg
.
p_dropout_
,
arg
.
p_dropout_
,
arg
.
seed_
,
arg
.
seed_
,
arg
.
offset_
);
arg
.
offset_
);
...
...
include/ck/tensor_operation/gpu/grid/gridwise_batched_multihead_attention_backward_xdl_cshuffle_v2.hpp
View file @
346b4e8d
...
@@ -1169,11 +1169,11 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
...
@@ -1169,11 +1169,11 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
const
YGradGridDesc_M0_O_M1
&
ygrad_grid_desc_m0_o_m1
,
const
YGradGridDesc_M0_O_M1
&
ygrad_grid_desc_m0_o_m1
,
const
Block2CTileMap
&
block_2_ctile_map
,
const
Block2CTileMap
&
block_2_ctile_map
,
const
C0MatrixMask
&
c0_matrix_mask
,
const
C0MatrixMask
&
c0_matrix_mask
,
const
ushort
p_dropout_in_16bits
,
FloatGemmAcc
p_dropout
,
FloatGemmAcc
p_dropout
,
ck
::
philox
&
ph
)
ck
::
philox
&
ph
)
{
{
const
FloatGemmAcc
rp_dropout
=
1.0
f
/
p_dropout
;
const
ushort
p_dropout_in_16bits
=
uint16_t
(
std
::
floor
(
p_dropout
*
65535.0
));
const
FloatGemmAcc
rp_dropout
=
1.0
f
/
p_dropout
;
const
auto
q_grid_buf
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Global
>
(
const
auto
q_grid_buf
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Global
>
(
p_q_grid
,
q_grid_desc_k0_m_k1
.
GetElementSpaceSize
());
p_q_grid
,
q_grid_desc_k0_m_k1
.
GetElementSpaceSize
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment