Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
2b895ac4
Commit
2b895ac4
authored
Dec 21, 2022
by
fsx950223
Browse files
rename
parent
a124067c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
25 additions
and
25 deletions
+25
-25
example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
..._sparse_embedding/sparse_embedding3_forward_layernorm.cpp
+10
-10
include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp
...evice/impl/device_sparse_embeddings_forward_layernorm.hpp
+9
-9
include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp
...gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp
+6
-6
No files found.
example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp
View file @
2b895ac4
...
...
@@ -25,16 +25,16 @@ using GammaDataType = ck::half_t;
using
BetaDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
OutType
=
ck
::
half_t
;
using
ElementwiseOperation
=
ck
::
tensor_operation
::
element_wise
::
AddAdd
;
using
Emb
ElementwiseOperation
=
ck
::
tensor_operation
::
element_wise
::
AddAdd
;
using
DeviceInstance_fp16_e256
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
256
,
1
,
1
,
3
>
;
using
DeviceInstance_fp16_e512
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
512
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e768
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
768
,
1
,
1
,
3
>
;
using
DeviceInstance_fp16_e1024
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
1024
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e1536
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
1536
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e2048
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
2048
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e4096
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
4096
,
1
,
8
,
3
>
;
using
DeviceInstance_fp16_e8192
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
ElementwiseOperation
,
256
,
1
,
256
,
1
,
8192
,
1
,
8
,
3
>
;
using
DeviceInstance_fp16_e256
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
256
,
1
,
1
,
3
>
;
using
DeviceInstance_fp16_e512
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
512
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e768
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
768
,
1
,
1
,
3
>
;
using
DeviceInstance_fp16_e1024
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
1024
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e1536
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
1536
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e2048
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
2048
,
1
,
2
,
3
>
;
using
DeviceInstance_fp16_e4096
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
4096
,
1
,
8
,
3
>
;
using
DeviceInstance_fp16_e8192
=
ck
::
tensor_operation
::
device
::
DeviceSparseEmbeddingsForwardLayernorm
<
EmbType
,
IndexType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
OutType
,
Emb
ElementwiseOperation
,
256
,
1
,
256
,
1
,
8192
,
1
,
8
,
3
>
;
template
<
typename
emb_type
,
ck
::
index_t
dim
>
struct
emb_kernel
{};
...
...
@@ -137,7 +137,7 @@ int main()
current_dim
,
index_length
,
epsilon
,
ElementwiseOperation
{});
Emb
ElementwiseOperation
{});
std
::
cout
<<
"Dim:"
<<
current_dim
<<
", kernel:"
<<
device_instance
.
GetTypeString
()
<<
std
::
endl
<<
std
::
flush
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp
View file @
2b895ac4
...
...
@@ -24,7 +24,7 @@ template <typename EmbType,
typename
BetaDataType
,
typename
AccDataType
,
typename
OutType
,
typename
ElementwiseOperation
,
typename
Emb
ElementwiseOperation
,
ck
::
index_t
BlockSize
,
ck
::
index_t
DimClusterSize
,
ck
::
index_t
RowClusterSize
,
...
...
@@ -50,7 +50,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
const
ck
::
index_t
EmbeddingDim
,
const
ck
::
index_t
IndexLength
,
const
AccDataType
epsilon
,
const
ElementwiseOperation
elementwise_op
)
const
Emb
ElementwiseOperation
emb_
elementwise_op
)
:
p_out_
(
p_out
),
p_embs_
(
p_embs
),
p_indexs_
(
p_indexs
),
...
...
@@ -59,7 +59,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
EmbeddingDim_
(
EmbeddingDim
),
IndexLength_
(
IndexLength
),
epsilon_
(
epsilon
),
elementwise_op_
(
elementwise_op
)
emb_
elementwise_op_
(
emb_
elementwise_op
)
{
grid_size_
=
(
IndexLength
+
DimClusterSize
-
1
)
/
DimClusterSize
;
}
...
...
@@ -72,7 +72,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
ck
::
index_t
EmbeddingDim_
;
ck
::
index_t
IndexLength_
;
AccDataType
epsilon_
;
ElementwiseOperation
elementwise_op_
;
Emb
ElementwiseOperation
emb_
elementwise_op_
;
size_t
grid_size_
;
};
...
...
@@ -86,7 +86,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
ck
::
index_t
EmbeddingDim
,
ck
::
index_t
IndexLength
,
const
AccDataType
epsilon
,
const
ElementwiseOperation
elementwise_op
)
const
Emb
ElementwiseOperation
emb_
elementwise_op
)
{
return
std
::
make_unique
<
Argument
>
(
reinterpret_cast
<
OutType
*>
(
p_out
),
p_embs
,
...
...
@@ -96,7 +96,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
EmbeddingDim
,
IndexLength
,
epsilon
,
elementwise_op
);
emb_
elementwise_op
);
}
using
GridwiseSparseEmbedding
=
...
...
@@ -107,7 +107,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
AccDataType
,
OutType
,
decltype
(
MakeOutputDescriptor
(
1
,
1
)),
ElementwiseOperation
,
Emb
ElementwiseOperation
,
BlockSize
,
DimClusterSize
,
RowClusterSize
,
...
...
@@ -131,7 +131,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
AccDataType
,
OutType
,
decltype
(
out_desc
),
ElementwiseOperation
,
Emb
ElementwiseOperation
,
NumEmbeddings
>
;
float
avg_time
=
0
;
avg_time
+=
launch_and_time_kernel
(
stream_config
,
...
...
@@ -146,7 +146,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
arg
.
p_beta_
,
out_desc
,
arg
.
epsilon_
,
arg
.
elementwise_op_
);
arg
.
emb_
elementwise_op_
);
return
(
avg_time
);
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp
View file @
2b895ac4
...
...
@@ -18,7 +18,7 @@ template <typename GridwiseSparseEmbedding,
typename
AccDataType
,
typename
OutType
,
typename
OutGridDesc
,
typename
ElementwiseOperation
,
typename
Emb
ElementwiseOperation
,
ck
::
index_t
NumEmbeddings
>
#if CK_USE_LAUNCH_BOUNDS
__launch_bounds__
(
CK_MAX_THREAD_PER_BLOCK
,
CK_MIN_BLOCK_PER_CU
)
...
...
@@ -31,10 +31,10 @@ __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
const
BetaDataType
*
p_beta
,
const
OutGridDesc
out_grid_desc
,
const
AccDataType
epsilon
,
const
ElementwiseOperation
elementwise_op
)
const
Emb
ElementwiseOperation
emb_
elementwise_op
)
{
GridwiseSparseEmbedding
::
Run
(
p_out
,
p_embs
,
p_indexes
,
p_gamma
,
p_beta
,
out_grid_desc
,
epsilon
,
elementwise_op
);
p_out
,
p_embs
,
p_indexes
,
p_gamma
,
p_beta
,
out_grid_desc
,
epsilon
,
emb_
elementwise_op
);
}
template
<
typename
EmbType
,
...
...
@@ -44,7 +44,7 @@ template <typename EmbType,
typename
AccDataType
,
typename
OutType
,
typename
OutGridDesc
,
typename
ElementwiseOperation
,
typename
Emb
ElementwiseOperation
,
ck
::
index_t
BlockSize
,
ck
::
index_t
DimClusterSize
,
ck
::
index_t
RowClusterSize
,
...
...
@@ -96,7 +96,7 @@ struct GridwiseSparseEmbeddingsForwardLayernorm
const
BetaDataType
*
p_beta
,
const
OutGridDesc
,
const
AccDataType
epsilon
,
const
ElementwiseOperation
elementwise_op
)
const
Emb
ElementwiseOperation
emb_
elementwise_op
)
{
const
index_t
thread_local_id
=
get_thread_local_1d_id
();
const
index_t
block_global_id
=
get_block_1d_id
();
...
...
@@ -189,7 +189,7 @@ struct GridwiseSparseEmbeddingsForwardLayernorm
return
acc_thread_buf
(
Number
<
register_offset
>
{});
},
Number
<
1
>
{});
unpack2
(
elementwise_op
,
out_data_refs
,
in_data_refs
);
unpack2
(
emb_
elementwise_op
,
out_data_refs
,
in_data_refs
);
});
});
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment