Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
514cee8a
"tools/git@developer.sourcefind.cn:modelzoo/paddleocr.git" did not exist on "3f11da7d15c75f8f22b385e77e4ff5096fdec4cb"
Commit
514cee8a
authored
Aug 10, 2023
by
letaoqin
Browse files
bias parameters update to other example
parent
d84b00c7
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
48 additions
and
36 deletions
+48
-36
example/32_batched_gemm_scale_softmax_gemm/batched_multihead_attention_forward_v2.cpp
...e_softmax_gemm/batched_multihead_attention_forward_v2.cpp
+9
-3
example/32_batched_gemm_scale_softmax_gemm/batched_multihead_attention_train_v2.cpp
...ale_softmax_gemm/batched_multihead_attention_train_v2.cpp
+15
-15
example/32_batched_gemm_scale_softmax_gemm/grouped_multihead_attention_forward_v2.cpp
...e_softmax_gemm/grouped_multihead_attention_forward_v2.cpp
+9
-3
example/32_batched_gemm_scale_softmax_gemm/grouped_multihead_attention_train_v2.cpp
...ale_softmax_gemm/grouped_multihead_attention_train_v2.cpp
+15
-15
No files found.
example/32_batched_gemm_scale_softmax_gemm/batched_multihead_attention_forward_v2.cpp
View file @
514cee8a
...
@@ -135,6 +135,7 @@ using DeviceGemmInstance =
...
@@ -135,6 +135,7 @@ using DeviceGemmInstance =
8
,
8
,
8
,
8
,
true
,
true
,
4
,
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
...
@@ -146,7 +147,8 @@ using DeviceGemmInstance =
...
@@ -146,7 +147,8 @@ using DeviceGemmInstance =
1
,
// CShuffleNXdlPerWavePerShuffle
1
,
// CShuffleNXdlPerWavePerShuffle
S
<
1
,
64
,
1
,
4
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
S
<
1
,
64
,
1
,
4
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
MaskingSpec
,
// MaskingSpecialization
4
,
MaskingSpec
,
// MaskingSpecialization
Deterministic
>
;
Deterministic
>
;
#elif(DIM <= 64)
#elif(DIM <= 64)
using
DeviceGemmInstance
=
using
DeviceGemmInstance
=
...
@@ -206,6 +208,7 @@ using DeviceGemmInstance =
...
@@ -206,6 +208,7 @@ using DeviceGemmInstance =
8
,
8
,
8
,
8
,
true
,
true
,
4
,
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
...
@@ -217,7 +220,8 @@ using DeviceGemmInstance =
...
@@ -217,7 +220,8 @@ using DeviceGemmInstance =
2
,
// CShuffleNXdlPerWavePerShuffle
2
,
// CShuffleNXdlPerWavePerShuffle
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
MaskingSpec
,
// MaskingSpecialization
4
,
MaskingSpec
,
// MaskingSpecialization
Deterministic
>
;
Deterministic
>
;
#elif(DIM <= 128)
#elif(DIM <= 128)
using
DeviceGemmInstance
=
using
DeviceGemmInstance
=
...
@@ -277,6 +281,7 @@ using DeviceGemmInstance =
...
@@ -277,6 +281,7 @@ using DeviceGemmInstance =
8
,
8
,
8
,
8
,
true
,
true
,
4
,
S
<
8
,
32
,
1
>
,
// B1BlockTransfer
S
<
8
,
32
,
1
>
,
// B1BlockTransfer
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
...
@@ -288,7 +293,8 @@ using DeviceGemmInstance =
...
@@ -288,7 +293,8 @@ using DeviceGemmInstance =
2
,
// CShuffleNXdlPerWavePerShuffle
2
,
// CShuffleNXdlPerWavePerShuffle
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
MaskingSpec
,
// MaskingSpecialization
4
,
MaskingSpec
,
// MaskingSpecialization
Deterministic
>
;
Deterministic
>
;
#endif
#endif
...
...
example/32_batched_gemm_scale_softmax_gemm/batched_multihead_attention_train_v2.cpp
View file @
514cee8a
This diff is collapsed.
Click to expand it.
example/32_batched_gemm_scale_softmax_gemm/grouped_multihead_attention_forward_v2.cpp
View file @
514cee8a
...
@@ -135,6 +135,7 @@ using DeviceGemmInstance =
...
@@ -135,6 +135,7 @@ using DeviceGemmInstance =
8
,
8
,
8
,
8
,
true
,
true
,
1
,
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
...
@@ -146,7 +147,8 @@ using DeviceGemmInstance =
...
@@ -146,7 +147,8 @@ using DeviceGemmInstance =
1
,
// CShuffleNXdlPerWavePerShuffle
1
,
// CShuffleNXdlPerWavePerShuffle
S
<
1
,
64
,
1
,
4
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
S
<
1
,
64
,
1
,
4
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
MaskingSpec
,
// MaskingSpecialization
1
,
MaskingSpec
,
// MaskingSpecialization
Deterministic
>
;
Deterministic
>
;
#elif(DIM <= 64)
#elif(DIM <= 64)
using
DeviceGemmInstance
=
using
DeviceGemmInstance
=
...
@@ -206,6 +208,7 @@ using DeviceGemmInstance =
...
@@ -206,6 +208,7 @@ using DeviceGemmInstance =
8
,
8
,
8
,
8
,
true
,
true
,
1
,
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
16
,
16
,
1
>
,
// B1BlockTransfer
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
...
@@ -217,7 +220,8 @@ using DeviceGemmInstance =
...
@@ -217,7 +220,8 @@ using DeviceGemmInstance =
2
,
// CShuffleNXdlPerWavePerShuffle
2
,
// CShuffleNXdlPerWavePerShuffle
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
MaskingSpec
,
// MaskingSpecialization
1
,
MaskingSpec
,
// MaskingSpecialization
Deterministic
>
;
Deterministic
>
;
#elif(DIM <= 128)
#elif(DIM <= 128)
using
DeviceGemmInstance
=
using
DeviceGemmInstance
=
...
@@ -277,6 +281,7 @@ using DeviceGemmInstance =
...
@@ -277,6 +281,7 @@ using DeviceGemmInstance =
8
,
8
,
8
,
8
,
true
,
true
,
1
,
S
<
8
,
32
,
1
>
,
// B1BlockTransfer
S
<
8
,
32
,
1
>
,
// B1BlockTransfer
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
...
@@ -288,7 +293,8 @@ using DeviceGemmInstance =
...
@@ -288,7 +293,8 @@ using DeviceGemmInstance =
2
,
// CShuffleNXdlPerWavePerShuffle
2
,
// CShuffleNXdlPerWavePerShuffle
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
S
<
1
,
32
,
1
,
8
>
,
// CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
8
,
// CShuffleBlockTransferScalarPerVector_NPerBlock
MaskingSpec
,
// MaskingSpecialization
1
,
MaskingSpec
,
// MaskingSpecialization
Deterministic
>
;
Deterministic
>
;
#endif
#endif
...
...
example/32_batched_gemm_scale_softmax_gemm/grouped_multihead_attention_train_v2.cpp
View file @
514cee8a
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment