".github/vscode:/vscode.git/clone" did not exist on "4f1c989ffbbfb14e1244dcdae975ab3b2f6aa966"
Commit 2b895ac4 authored by fsx950223's avatar fsx950223
Browse files

rename

parent a124067c
...@@ -25,16 +25,16 @@ using GammaDataType = ck::half_t; ...@@ -25,16 +25,16 @@ using GammaDataType = ck::half_t;
using BetaDataType = ck::half_t; using BetaDataType = ck::half_t;
using AccDataType = float; using AccDataType = float;
using OutType = ck::half_t; using OutType = ck::half_t;
using ElementwiseOperation = ck::tensor_operation::element_wise::AddAdd; using EmbElementwiseOperation = ck::tensor_operation::element_wise::AddAdd;
using DeviceInstance_fp16_e256 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 256, 1, 1, 3>; using DeviceInstance_fp16_e256 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 256, 1, 1, 3>;
using DeviceInstance_fp16_e512 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 512, 1, 2, 3>; using DeviceInstance_fp16_e512 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 512, 1, 2, 3>;
using DeviceInstance_fp16_e768 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 768, 1, 1, 3>; using DeviceInstance_fp16_e768 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 768, 1, 1, 3>;
using DeviceInstance_fp16_e1024 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 1024, 1, 2, 3>; using DeviceInstance_fp16_e1024 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 1024, 1, 2, 3>;
using DeviceInstance_fp16_e1536 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 1536, 1, 2, 3>; using DeviceInstance_fp16_e1536 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 1536, 1, 2, 3>;
using DeviceInstance_fp16_e2048 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 2048, 1, 2, 3>; using DeviceInstance_fp16_e2048 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 2048, 1, 2, 3>;
using DeviceInstance_fp16_e4096 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 4096, 1, 8, 3>; using DeviceInstance_fp16_e4096 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 4096, 1, 8, 3>;
using DeviceInstance_fp16_e8192 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, ElementwiseOperation, 256, 1, 256, 1, 8192, 1, 8, 3>; using DeviceInstance_fp16_e8192 = ck::tensor_operation::device::DeviceSparseEmbeddingsForwardLayernorm<EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, EmbElementwiseOperation, 256, 1, 256, 1, 8192, 1, 8, 3>;
template<typename emb_type, ck::index_t dim> struct emb_kernel{}; template<typename emb_type, ck::index_t dim> struct emb_kernel{};
...@@ -137,7 +137,7 @@ int main() ...@@ -137,7 +137,7 @@ int main()
current_dim, current_dim,
index_length, index_length,
epsilon, epsilon,
ElementwiseOperation{}); EmbElementwiseOperation{});
std::cout << "Dim:" << current_dim << ", kernel:" << device_instance.GetTypeString() std::cout << "Dim:" << current_dim << ", kernel:" << device_instance.GetTypeString()
<< std::endl << std::endl
<< std::flush; << std::flush;
......
...@@ -24,7 +24,7 @@ template <typename EmbType, ...@@ -24,7 +24,7 @@ template <typename EmbType,
typename BetaDataType, typename BetaDataType,
typename AccDataType, typename AccDataType,
typename OutType, typename OutType,
typename ElementwiseOperation, typename EmbElementwiseOperation,
ck::index_t BlockSize, ck::index_t BlockSize,
ck::index_t DimClusterSize, ck::index_t DimClusterSize,
ck::index_t RowClusterSize, ck::index_t RowClusterSize,
...@@ -50,7 +50,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -50,7 +50,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
const ck::index_t EmbeddingDim, const ck::index_t EmbeddingDim,
const ck::index_t IndexLength, const ck::index_t IndexLength,
const AccDataType epsilon, const AccDataType epsilon,
const ElementwiseOperation elementwise_op) const EmbElementwiseOperation emb_elementwise_op)
: p_out_(p_out), : p_out_(p_out),
p_embs_(p_embs), p_embs_(p_embs),
p_indexs_(p_indexs), p_indexs_(p_indexs),
...@@ -59,7 +59,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -59,7 +59,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
EmbeddingDim_(EmbeddingDim), EmbeddingDim_(EmbeddingDim),
IndexLength_(IndexLength), IndexLength_(IndexLength),
epsilon_(epsilon), epsilon_(epsilon),
elementwise_op_(elementwise_op) emb_elementwise_op_(emb_elementwise_op)
{ {
grid_size_ = (IndexLength + DimClusterSize - 1) / DimClusterSize; grid_size_ = (IndexLength + DimClusterSize - 1) / DimClusterSize;
} }
...@@ -72,7 +72,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -72,7 +72,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
ck::index_t EmbeddingDim_; ck::index_t EmbeddingDim_;
ck::index_t IndexLength_; ck::index_t IndexLength_;
AccDataType epsilon_; AccDataType epsilon_;
ElementwiseOperation elementwise_op_; EmbElementwiseOperation emb_elementwise_op_;
size_t grid_size_; size_t grid_size_;
}; };
...@@ -86,7 +86,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -86,7 +86,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
ck::index_t EmbeddingDim, ck::index_t EmbeddingDim,
ck::index_t IndexLength, ck::index_t IndexLength,
const AccDataType epsilon, const AccDataType epsilon,
const ElementwiseOperation elementwise_op) const EmbElementwiseOperation emb_elementwise_op)
{ {
return std::make_unique<Argument>(reinterpret_cast<OutType*>(p_out), return std::make_unique<Argument>(reinterpret_cast<OutType*>(p_out),
p_embs, p_embs,
...@@ -96,7 +96,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -96,7 +96,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
EmbeddingDim, EmbeddingDim,
IndexLength, IndexLength,
epsilon, epsilon,
elementwise_op); emb_elementwise_op);
} }
using GridwiseSparseEmbedding = using GridwiseSparseEmbedding =
...@@ -107,7 +107,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -107,7 +107,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
AccDataType, AccDataType,
OutType, OutType,
decltype(MakeOutputDescriptor(1, 1)), decltype(MakeOutputDescriptor(1, 1)),
ElementwiseOperation, EmbElementwiseOperation,
BlockSize, BlockSize,
DimClusterSize, DimClusterSize,
RowClusterSize, RowClusterSize,
...@@ -131,7 +131,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -131,7 +131,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
AccDataType, AccDataType,
OutType, OutType,
decltype(out_desc), decltype(out_desc),
ElementwiseOperation, EmbElementwiseOperation,
NumEmbeddings>; NumEmbeddings>;
float avg_time = 0; float avg_time = 0;
avg_time += launch_and_time_kernel(stream_config, avg_time += launch_and_time_kernel(stream_config,
...@@ -146,7 +146,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator ...@@ -146,7 +146,7 @@ struct DeviceSparseEmbeddingsForwardLayernorm : public BaseOperator
arg.p_beta_, arg.p_beta_,
out_desc, out_desc,
arg.epsilon_, arg.epsilon_,
arg.elementwise_op_); arg.emb_elementwise_op_);
return (avg_time); return (avg_time);
} }
......
...@@ -18,7 +18,7 @@ template <typename GridwiseSparseEmbedding, ...@@ -18,7 +18,7 @@ template <typename GridwiseSparseEmbedding,
typename AccDataType, typename AccDataType,
typename OutType, typename OutType,
typename OutGridDesc, typename OutGridDesc,
typename ElementwiseOperation, typename EmbElementwiseOperation,
ck::index_t NumEmbeddings> ck::index_t NumEmbeddings>
#if CK_USE_LAUNCH_BOUNDS #if CK_USE_LAUNCH_BOUNDS
__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
...@@ -31,10 +31,10 @@ __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) ...@@ -31,10 +31,10 @@ __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
const BetaDataType* p_beta, const BetaDataType* p_beta,
const OutGridDesc out_grid_desc, const OutGridDesc out_grid_desc,
const AccDataType epsilon, const AccDataType epsilon,
const ElementwiseOperation elementwise_op) const EmbElementwiseOperation emb_elementwise_op)
{ {
GridwiseSparseEmbedding::Run( GridwiseSparseEmbedding::Run(
p_out, p_embs, p_indexes, p_gamma, p_beta, out_grid_desc, epsilon, elementwise_op); p_out, p_embs, p_indexes, p_gamma, p_beta, out_grid_desc, epsilon, emb_elementwise_op);
} }
template <typename EmbType, template <typename EmbType,
...@@ -44,7 +44,7 @@ template <typename EmbType, ...@@ -44,7 +44,7 @@ template <typename EmbType,
typename AccDataType, typename AccDataType,
typename OutType, typename OutType,
typename OutGridDesc, typename OutGridDesc,
typename ElementwiseOperation, typename EmbElementwiseOperation,
ck::index_t BlockSize, ck::index_t BlockSize,
ck::index_t DimClusterSize, ck::index_t DimClusterSize,
ck::index_t RowClusterSize, ck::index_t RowClusterSize,
...@@ -96,7 +96,7 @@ struct GridwiseSparseEmbeddingsForwardLayernorm ...@@ -96,7 +96,7 @@ struct GridwiseSparseEmbeddingsForwardLayernorm
const BetaDataType* p_beta, const BetaDataType* p_beta,
const OutGridDesc, const OutGridDesc,
const AccDataType epsilon, const AccDataType epsilon,
const ElementwiseOperation elementwise_op) const EmbElementwiseOperation emb_elementwise_op)
{ {
const index_t thread_local_id = get_thread_local_1d_id(); const index_t thread_local_id = get_thread_local_1d_id();
const index_t block_global_id = get_block_1d_id(); const index_t block_global_id = get_block_1d_id();
...@@ -189,7 +189,7 @@ struct GridwiseSparseEmbeddingsForwardLayernorm ...@@ -189,7 +189,7 @@ struct GridwiseSparseEmbeddingsForwardLayernorm
return acc_thread_buf(Number<register_offset>{}); return acc_thread_buf(Number<register_offset>{});
}, },
Number<1>{}); Number<1>{});
unpack2(elementwise_op, out_data_refs, in_data_refs); unpack2(emb_elementwise_op, out_data_refs, in_data_refs);
}); });
}); });
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment