Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
65c56e56
Commit
65c56e56
authored
Jul 25, 2022
by
Chao Liu
Browse files
update Tensor
parent
028171e9
Changes
60
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
88 additions
and
85 deletions
+88
-85
example/16_gemm_reduce/gemm_reduce_xdl_mean_squaremean_fp16.cpp
...e/16_gemm_reduce/gemm_reduce_xdl_mean_squaremean_fp16.cpp
+5
-5
example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
+3
-3
example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
...e/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
+5
-5
example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
+3
-3
example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
+3
-3
example/19_binary_elementwise/elementwise_add_1d.cpp
example/19_binary_elementwise/elementwise_add_1d.cpp
+3
-3
example/19_binary_elementwise/elementwise_add_4d.cpp
example/19_binary_elementwise/elementwise_add_4d.cpp
+3
-3
example/20_convnd_bwd_weight/convnd_bwd_weight_common.hpp
example/20_convnd_bwd_weight/convnd_bwd_weight_common.hpp
+3
-3
example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_fp16.cpp
..._gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_fp16.cpp
+11
-10
example/21_gemm_layernorm/gemm_layernorm_xdl_fp16.cpp
example/21_gemm_layernorm/gemm_layernorm_xdl_fp16.cpp
+9
-8
example/21_gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
..._gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
+7
-7
example/22_cgemm/cgemm_xdl_fp16.cpp
example/22_cgemm/cgemm_xdl_fp16.cpp
+6
-6
example/23_softmax/softmax_blockwise.cpp
example/23_softmax/softmax_blockwise.cpp
+3
-3
example/24_batched_gemm_c_permute/batched_gemm_c_permute_xdl_fp16.cpp
...atched_gemm_c_permute/batched_gemm_c_permute_xdl_fp16.cpp
+4
-3
example/25_gemm_bias_c_permute/gemm_bias_c_permute_xdl_fp16.cpp
...e/25_gemm_bias_c_permute/gemm_bias_c_permute_xdl_fp16.cpp
+5
-5
example/26_contraction/contraction_bilinear_xdl_fp32.cpp
example/26_contraction/contraction_bilinear_xdl_fp32.cpp
+4
-4
example/26_contraction/contraction_scale_xdl_fp32.cpp
example/26_contraction/contraction_scale_xdl_fp32.cpp
+3
-3
example/27_layernorm/layernorm_blockwise.cpp
example/27_layernorm/layernorm_blockwise.cpp
+4
-4
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+1
-1
library/include/ck/library/utility/op_instance_engine.hpp
library/include/ck/library/utility/op_instance_engine.hpp
+3
-3
No files found.
example/16_gemm_reduce/gemm_reduce_xdl_mean_squaremean_fp16.cpp
View file @
65c56e56
...
...
@@ -188,13 +188,13 @@ int main(int argc, char* argv[])
break
;
}
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
reduce0_device_buf
(
sizeof
(
ReduceDataType
)
*
reduce0_m_device_result
.
mDesc
.
GetElementSpace
());
reduce0_m_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
reduce1_device_buf
(
sizeof
(
ReduceDataType
)
*
reduce1_m_device_result
.
mDesc
.
GetElementSpace
());
reduce1_m_device_result
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_m_k
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_k_n
.
mData
.
data
());
...
...
example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
View file @
65c56e56
...
...
@@ -135,9 +135,9 @@ int run_conv_bwd_data(bool do_verification,
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
}
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_device
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpace
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
());
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_device
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
Size
());
out_device_buf
.
ToDevice
(
out
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
...
...
example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
View file @
65c56e56
...
...
@@ -174,13 +174,13 @@ int main(int argc, char* argv[])
break
;
}
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_g_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_g_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_g_m_n_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_g_m_k
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_g_k_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_g_m_n_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
reduce0_device_buf
(
sizeof
(
ReduceDataType
)
*
d0_g_m_device_result
.
mDesc
.
GetElementSpace
());
d0_g_m_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
reduce1_device_buf
(
sizeof
(
ReduceDataType
)
*
d1_g_m_device_result
.
mDesc
.
GetElementSpace
());
d1_g_m_device_result
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_g_m_k
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_g_k_n
.
mData
.
data
());
...
...
example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
View file @
65c56e56
...
...
@@ -92,9 +92,9 @@ int main()
a_m_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
b_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
DeviceMem
a_m_n_device_buf
(
sizeof
(
ABDataType
)
*
a_m_n
.
mDesc
.
GetElementSpace
());
DeviceMem
b_n_device_buf
(
sizeof
(
ABDataType
)
*
b_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_m_n_device_buf
(
sizeof
(
CDataType
)
*
c_m_n
.
mDesc
.
GetElementSpace
());
DeviceMem
a_m_n_device_buf
(
sizeof
(
ABDataType
)
*
a_m_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_n_device_buf
(
sizeof
(
ABDataType
)
*
b_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_m_n_device_buf
(
sizeof
(
CDataType
)
*
c_m_n
.
mDesc
.
GetElementSpace
Size
());
a_m_n_device_buf
.
ToDevice
(
a_m_n
.
mData
.
data
());
b_n_device_buf
.
ToDevice
(
b_n
.
mData
.
data
());
...
...
example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
View file @
65c56e56
...
...
@@ -74,9 +74,9 @@ int main()
a_m
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
b_m_n_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
DeviceMem
a_m_device_buf
(
sizeof
(
ABDataType
)
*
a_m
.
mDesc
.
GetElementSpace
());
DeviceMem
b_m_n_k_device_buf
(
sizeof
(
ABDataType
)
*
b_m_n_k
.
mDesc
.
GetElementSpace
());
DeviceMem
c_m_n_k_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_k
.
mDesc
.
GetElementSpace
());
DeviceMem
a_m_device_buf
(
sizeof
(
ABDataType
)
*
a_m
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_m_n_k_device_buf
(
sizeof
(
ABDataType
)
*
b_m_n_k
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_m_n_k_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_k
.
mDesc
.
GetElementSpace
Size
());
a_m_device_buf
.
ToDevice
(
a_m
.
mData
.
data
());
b_m_n_k_device_buf
.
ToDevice
(
b_m_n_k
.
mData
.
data
());
...
...
example/19_binary_elementwise/elementwise_add_1d.cpp
View file @
65c56e56
...
...
@@ -72,9 +72,9 @@ int main()
a_m
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
b_m
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
DeviceMem
a_m_device_buf
(
sizeof
(
ABDataType
)
*
a_m
.
mDesc
.
GetElementSpace
());
DeviceMem
b_m_device_buf
(
sizeof
(
ABDataType
)
*
b_m
.
mDesc
.
GetElementSpace
());
DeviceMem
c_m_device_buf
(
sizeof
(
CDataType
)
*
c_m
.
mDesc
.
GetElementSpace
());
DeviceMem
a_m_device_buf
(
sizeof
(
ABDataType
)
*
a_m
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_m_device_buf
(
sizeof
(
ABDataType
)
*
b_m
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_m_device_buf
(
sizeof
(
CDataType
)
*
c_m
.
mDesc
.
GetElementSpace
Size
());
a_m_device_buf
.
ToDevice
(
a_m
.
mData
.
data
());
b_m_device_buf
.
ToDevice
(
b_m
.
mData
.
data
());
...
...
example/19_binary_elementwise/elementwise_add_4d.cpp
View file @
65c56e56
...
...
@@ -74,9 +74,9 @@ int main()
a
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
b
.
GenerateTensorValue
(
GeneratorTensor_3
<
ABDataType
>
{
0.0
,
1.0
});
DeviceMem
a_device_buf
(
sizeof
(
ABDataType
)
*
a
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
ABDataType
)
*
b
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ABDataType
)
*
a
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_device_buf
(
sizeof
(
ABDataType
)
*
b
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b
.
mData
.
data
());
...
...
example/20_convnd_bwd_weight/convnd_bwd_weight_common.hpp
View file @
65c56e56
...
...
@@ -136,9 +136,9 @@ int run_conv_bwd_weight(bool do_verification,
out
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
-
0.5
,
0.5
});
}
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
());
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
Size
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
out_device_buf
.
ToDevice
(
out
.
mData
.
data
());
...
...
example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_fp16.cpp
View file @
65c56e56
...
...
@@ -281,18 +281,19 @@ int main()
gamma_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
GammaDataType
>
{
-
1
,
1
});
beta_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BetaDataType
>
{
-
1
,
1
});
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n
.
mDesc
.
GetElementSpace
());
DeviceMem
bias_device_buf
(
sizeof
(
BiasDataType
)
*
bias_n
.
mDesc
.
GetElementSpace
());
DeviceMem
d0_device_buf
(
sizeof
(
D0DataType
)
*
c1_m_n
.
mDesc
.
GetElementSpace
());
DeviceMem
reduceMean_device_buf
(
sizeof
(
ReduceDataType
)
*
reduceMean_m
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
bias_device_buf
(
sizeof
(
BiasDataType
)
*
bias_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
d0_device_buf
(
sizeof
(
D0DataType
)
*
c1_m_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
reduceMean_device_buf
(
sizeof
(
ReduceDataType
)
*
reduceMean_m
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
reduceMeanSquare_device_buf
(
sizeof
(
ReduceDataType
)
*
reduceMeanSquare_m
.
mDesc
.
GetElementSpace
());
DeviceMem
gamma_device_buf
(
sizeof
(
GammaDataType
)
*
gamma_n
.
mDesc
.
GetElementSpace
());
DeviceMem
beta_device_buf
(
sizeof
(
BetaDataType
)
*
beta_n
.
mDesc
.
GetElementSpace
());
reduceMeanSquare_m
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
gamma_device_buf
(
sizeof
(
GammaDataType
)
*
gamma_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
beta_device_buf
(
sizeof
(
BetaDataType
)
*
beta_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
layerNorm_device_buf
(
sizeof
(
LayerNormOutDataType
)
*
layerNorm_m_n
.
mDesc
.
GetElementSpace
());
layerNorm_m_n
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_m_k
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_k_n
.
mData
.
data
());
...
...
example/21_gemm_layernorm/gemm_layernorm_xdl_fp16.cpp
View file @
65c56e56
...
...
@@ -249,16 +249,17 @@ int main()
gamma_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
GammaDataType
>
{
-
1
,
1
});
beta_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BetaDataType
>
{
-
1
,
1
});
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n
.
mDesc
.
GetElementSpace
());
DeviceMem
reduceMean_device_buf
(
sizeof
(
ReduceDataType
)
*
reduceMean_m
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
reduceMean_device_buf
(
sizeof
(
ReduceDataType
)
*
reduceMean_m
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
reduceMeanSquare_device_buf
(
sizeof
(
ReduceDataType
)
*
reduceMeanSquare_m
.
mDesc
.
GetElementSpace
());
DeviceMem
gamma_device_buf
(
sizeof
(
GammaDataType
)
*
gamma_n
.
mDesc
.
GetElementSpace
());
DeviceMem
beta_device_buf
(
sizeof
(
BetaDataType
)
*
beta_n
.
mDesc
.
GetElementSpace
());
reduceMeanSquare_m
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
gamma_device_buf
(
sizeof
(
GammaDataType
)
*
gamma_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
beta_device_buf
(
sizeof
(
BetaDataType
)
*
beta_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
layerNorm_device_buf
(
sizeof
(
LayerNormOutDataType
)
*
layerNorm_m_n
.
mDesc
.
GetElementSpace
());
layerNorm_m_n
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_m_k
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_k_n
.
mData
.
data
());
...
...
example/21_gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
View file @
65c56e56
...
...
@@ -185,13 +185,13 @@ int main(int argc, char* argv[])
c_m_n_host_result
.
GenerateTensorValue
(
GeneratorTensor_1
<
CDataType
>
{
0
});
acc_m_n_host_result
.
GenerateTensorValue
(
GeneratorTensor_1
<
AccDataType
>
{
0
});
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
c0_bias_buf
(
sizeof
(
C0DataType
)
*
c0_n_bias
.
mDesc
.
GetElementSpace
());
DeviceMem
c0_add_buf
(
sizeof
(
C0DataType
)
*
c0_m_n_add
.
mDesc
.
GetElementSpace
());
DeviceMem
c0_gamma_buf
(
sizeof
(
C0DataType
)
*
c0_n_gamma
.
mDesc
.
GetElementSpace
());
DeviceMem
c0_beta_buf
(
sizeof
(
C0DataType
)
*
c0_n_beta
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c0_bias_buf
(
sizeof
(
C0DataType
)
*
c0_n_bias
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c0_add_buf
(
sizeof
(
C0DataType
)
*
c0_m_n_add
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c0_gamma_buf
(
sizeof
(
C0DataType
)
*
c0_n_gamma
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c0_beta_buf
(
sizeof
(
C0DataType
)
*
c0_n_beta
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_m_k
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_k_n
.
mData
.
data
());
...
...
example/22_cgemm/cgemm_xdl_fp16.cpp
View file @
65c56e56
...
...
@@ -177,14 +177,14 @@ int main(int argc, char* argv[])
auto
cgemm
=
DeviceCGemmInstance
{};
DeviceMem
a_m_k_real_device_buf
(
sizeof
(
ADataType
)
*
a_m_k_real
.
mDesc
.
GetElementSpace
());
DeviceMem
a_m_k_imag_device_buf
(
sizeof
(
ADataType
)
*
a_m_k_imag
.
mDesc
.
GetElementSpace
());
DeviceMem
b_k_n_real_device_buf
(
sizeof
(
BDataType
)
*
b_k_n_real
.
mDesc
.
GetElementSpace
());
DeviceMem
b_k_n_imag_device_buf
(
sizeof
(
BDataType
)
*
b_k_n_imag
.
mDesc
.
GetElementSpace
());
DeviceMem
a_m_k_real_device_buf
(
sizeof
(
ADataType
)
*
a_m_k_real
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
a_m_k_imag_device_buf
(
sizeof
(
ADataType
)
*
a_m_k_imag
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_k_n_real_device_buf
(
sizeof
(
BDataType
)
*
b_k_n_real
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_k_n_imag_device_buf
(
sizeof
(
BDataType
)
*
b_k_n_imag
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_m_n_real_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_real_device_result
.
mDesc
.
GetElementSpace
());
c_m_n_real_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
c_m_n_imag_device_buf
(
sizeof
(
CDataType
)
*
c_m_n_imag_device_result
.
mDesc
.
GetElementSpace
());
c_m_n_imag_device_result
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
workspace_device_buf
(
cgemm
.
GetWorkspaceSize
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
));
a_m_k_real_device_buf
.
ToDevice
(
a_m_k_real
.
mData
.
data
());
...
...
example/23_softmax/softmax_blockwise.cpp
View file @
65c56e56
...
...
@@ -177,7 +177,7 @@ int main(int argc, char* argv[])
}
if
(
beta
!=
0.0
f
)
for
(
size_t
i
=
0
;
i
<
out_ref
.
mDesc
.
GetElementSpace
();
i
++
)
for
(
size_t
i
=
0
;
i
<
out_ref
.
mDesc
.
GetElementSpace
Size
();
i
++
)
out
.
mData
[
i
]
=
out_ref
.
mData
[
i
];
};
// std::cout << "beta = " << beta << std::endl;
...
...
@@ -185,8 +185,8 @@ int main(int argc, char* argv[])
// LogRangeAsType<float>(std::cout << "tensor prior out: " , out.mData, ",") << std::endl;
// these buffers are usually provided by the user application
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
());
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
Size
());
in_dev
.
ToDevice
(
in
.
mData
.
data
());
...
...
example/24_batched_gemm_c_permute/batched_gemm_c_permute_xdl_fp16.cpp
View file @
65c56e56
...
...
@@ -154,9 +154,10 @@ int main(int argc, char* argv[])
break
;
}
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_g_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_g_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_g0_g1_m_n_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_g_m_k
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_g_k_n
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
c_device_buf
(
sizeof
(
CDataType
)
*
c_g0_g1_m_n_device_result
.
mDesc
.
GetElementSpaceSize
());
a_device_buf
.
ToDevice
(
a_g_m_k
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_g_k_n
.
mData
.
data
());
...
...
example/25_gemm_bias_c_permute/gemm_bias_c_permute_xdl_fp16.cpp
View file @
65c56e56
...
...
@@ -186,12 +186,12 @@ int main(int argc, char* argv[])
d_m0_m1_m2_n0_n1
.
GenerateTensorValue
(
GeneratorTensor_3
<
DDataType
>
{
0.0
,
1.0
});
}
DeviceMem
a_m_k_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
());
DeviceMem
b_k_n_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
());
DeviceMem
a_m_k_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_k_n_device_buf
(
sizeof
(
BDataType
)
*
b_k_n
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
d_m0_m1_m2_n0_n1_device_buf
(
sizeof
(
DDataType
)
*
d_m0_m1_m2_n0_n1
.
mDesc
.
GetElementSpace
());
DeviceMem
e_m0_m1_m2_n0_n1_device_buf
(
sizeof
(
EDataType
)
*
e_m0_m1_m2_n0_n1_device_result
.
mDesc
.
GetElementSpace
());
d_m0_m1_m2_n0_n1
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
e_m0_m1_m2_n0_n1_device_buf
(
sizeof
(
EDataType
)
*
e_m0_m1_m2_n0_n1_device_result
.
mDesc
.
GetElementSpace
Size
());
a_m_k_device_buf
.
ToDevice
(
a_m_k
.
mData
.
data
());
b_k_n_device_buf
.
ToDevice
(
b_k_n
.
mData
.
data
());
...
...
example/26_contraction/contraction_bilinear_xdl_fp32.cpp
View file @
65c56e56
...
...
@@ -324,10 +324,10 @@ int main(int argc, char* argv[])
break
;
}
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_ms_ks
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_ns_ks
.
mDesc
.
GetElementSpace
());
DeviceMem
d_device_buf
(
sizeof
(
DDataType
)
*
d_ms_ns
.
mDesc
.
GetElementSpace
());
DeviceMem
e_device_buf
(
sizeof
(
EDataType
)
*
e_ms_ns_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_ms_ks
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_ns_ks
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
d_device_buf
(
sizeof
(
DDataType
)
*
d_ms_ns
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
e_device_buf
(
sizeof
(
EDataType
)
*
e_ms_ns_device_result
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_ms_ks
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_ns_ks
.
mData
.
data
());
...
...
example/26_contraction/contraction_scale_xdl_fp32.cpp
View file @
65c56e56
...
...
@@ -307,9 +307,9 @@ int main(int argc, char* argv[])
break
;
}
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_ms_ks
.
mDesc
.
GetElementSpace
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_ns_ks
.
mDesc
.
GetElementSpace
());
DeviceMem
e_device_buf
(
sizeof
(
EDataType
)
*
e_ms_ns_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a_ms_ks
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b_ns_ks
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
e_device_buf
(
sizeof
(
EDataType
)
*
e_ms_ns_device_result
.
mDesc
.
GetElementSpace
Size
());
a_device_buf
.
ToDevice
(
a_ms_ks
.
mData
.
data
());
b_device_buf
.
ToDevice
(
b_ns_ks
.
mData
.
data
());
...
...
example/27_layernorm/layernorm_blockwise.cpp
View file @
65c56e56
...
...
@@ -75,10 +75,10 @@ int main()
gamma
.
GenerateTensorValue
(
GeneratorTensor_3
<
GammaDataType
>
{
0.0
,
1.0
});
beta
.
GenerateTensorValue
(
GeneratorTensor_3
<
BetaDataType
>
{
0.0
,
1.0
});
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpace
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpace
());
DeviceMem
beta_dev
(
sizeof
(
BetaDataType
)
*
beta
.
mDesc
.
GetElementSpace
());
DeviceMem
y_dev
(
sizeof
(
YDataType
)
*
y
.
mDesc
.
GetElementSpace
());
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
beta_dev
(
sizeof
(
BetaDataType
)
*
beta
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
y_dev
(
sizeof
(
YDataType
)
*
y
.
mDesc
.
GetElementSpace
Size
());
x_dev
.
ToDevice
(
x
.
mData
.
data
());
gamma_dev
.
ToDevice
(
gamma
.
mData
.
data
());
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
65c56e56
...
...
@@ -91,7 +91,7 @@ struct ReferenceConvFwd : public device::BaseOperator
arg
.
weight_
.
GetNumOfDimension
()
==
NDimSpatial
+
3
&&
arg
.
output_
.
GetNumOfDimension
()
==
NDimSpatial
+
3
))
{
std
::
th
ro
w
(
"wrong! inconsistent dimension"
);
throw
std
::
runtime_er
ro
r
(
"wrong! inconsistent dimension"
);
}
if
constexpr
(
NDimSpatial
==
1
)
...
...
library/include/ck/library/utility/op_instance_engine.hpp
View file @
65c56e56
...
...
@@ -103,8 +103,8 @@ class OpInstanceRunEngine
}
}
AllocateDeviceInputTensors
(
std
::
make_index_sequence
<
kNInArgs_
>
{});
out_device_buffer_
=
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
OutDataType
)
*
out_tensor_
->
mDesc
.
GetElementSpace
());
out_device_buffer_
=
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
OutDataType
)
*
out_tensor_
->
mDesc
.
GetElementSpace
Size
());
out_device_buffer_
->
SetZero
();
}
...
...
@@ -222,7 +222,7 @@ class OpInstanceRunEngine
in_device_buffers_
.
emplace_back
(
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
std
::
tuple_element_t
<
Index
,
InArgsTypesTuple
>
)
*
ts
->
mDesc
.
GetElementSpace
()))
ts
->
mDesc
.
GetElementSpace
Size
()))
->
ToDevice
(
ts
->
mData
.
data
());
}
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment