Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
451aef90
"vscode:/vscode.git/clone" did not exist on "693a0d08e4cb59f3649d492a8741d97d22c387d9"
Commit
451aef90
authored
Aug 22, 2022
by
Rosty Geyyer
Browse files
Add int4 example for convnd_fwd_bias_relu_add
parent
9efd033b
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
647 additions
and
105 deletions
+647
-105
example/30_grouped_convnd_fwd_bias_relu_add/CMakeLists.txt
example/30_grouped_convnd_fwd_bias_relu_add/CMakeLists.txt
+6
-1
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
...bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
+26
-23
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp
+35
-20
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp
+35
-20
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp
+35
-20
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
+459
-0
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
+35
-20
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
...k/tensor_operation/gpu/element/element_wise_operation.hpp
+10
-0
library/include/ck/library/utility/check_err.hpp
library/include/ck/library/utility/check_err.hpp
+6
-1
No files found.
example/30_grouped_convnd_fwd_bias_relu_add/CMakeLists.txt
View file @
451aef90
...
@@ -8,4 +8,9 @@ add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 grouped
...
@@ -8,4 +8,9 @@ add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 grouped
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 PRIVATE utility
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 PRIVATE utility
)
add_example_executable
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
)
add_example_executable
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 PRIVATE utility
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 PRIVATE utility
)
\ No newline at end of file
if
(
USE_BITINT_EXTENSION_INT4
)
add_example_executable
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int4 grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int4 PRIVATE utility
)
endif
()
# USE_BITINT_EXTENSION_INT4
\ No newline at end of file
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
View file @
451aef90
...
@@ -26,13 +26,16 @@ void print_helper_msg()
...
@@ -26,13 +26,16 @@ void print_helper_msg()
}
}
template
<
ck
::
index_t
NDimSpatial
,
template
<
ck
::
index_t
NDimSpatial
,
typename
InDataType
,
typename
In
Kernel
DataType
,
typename
WeiDataType
,
typename
Wei
Kernel
DataType
,
typename
CShuffleDataType
,
typename
CShuffleDataType
,
typename
OutDataType
,
typename
Out
Kernel
DataType
,
typename
InElementOp
,
typename
InElementOp
,
typename
WeiElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
OutElementOp
,
typename
InUserDataType
,
typename
WeiUserDataType
,
typename
OutUserDataType
,
typename
DeviceConvNDFwdInstance
>
typename
DeviceConvNDFwdInstance
>
int
run_grouped_conv_fwd_bias_relu_add
(
bool
do_verification
,
int
run_grouped_conv_fwd_bias_relu_add
(
bool
do_verification
,
int
init_method
,
int
init_method
,
...
@@ -47,12 +50,12 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -47,12 +50,12 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
const
WeiElementOp
&
wei_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
const
OutElementOp
&
out_element_op
)
{
{
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
In
User
DataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
Wei
User
DataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
OutDataType
>
bias
(
bias_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
bias
(
bias_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
residual
(
residual_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
residual
(
residual_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
out_host
(
out_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
out_host
(
out_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
out_device
(
out_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
out_device
(
out_g_n_k_wos_desc
);
std
::
cout
<<
"in: "
<<
in
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"in: "
<<
in
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
...
@@ -64,21 +67,21 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -64,21 +67,21 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
{
{
case
0
:
break
;
case
0
:
break
;
case
1
:
case
1
:
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
In
User
DataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
Wei
User
DataType
>
{
-
5
,
5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_2
<
Out
User
DataType
>
{
-
5
,
5
});
break
;
break
;
default:
default:
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
InDataType
>
{
0.0
,
1.0
});
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
In
User
DataType
>
{
0.0
,
1.0
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
Wei
User
DataType
>
{
-
0.5
,
0.5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
-
0.5
,
0.5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_3
<
Out
User
DataType
>
{
-
0.5
,
0.5
});
}
}
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
in_device_buf
(
sizeof
(
In
User
DataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
Wei
User
DataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
bias_device_buf
(
sizeof
(
OutDataType
)
*
bias
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
bias_device_buf
(
sizeof
(
Out
User
DataType
)
*
bias
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
residual_device_buf
(
sizeof
(
OutDataType
)
*
residual
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
residual_device_buf
(
sizeof
(
Out
User
DataType
)
*
residual
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out_device
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_device_buf
(
sizeof
(
Out
User
DataType
)
*
out_device
.
mDesc
.
GetElementSpaceSize
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
...
@@ -154,7 +157,7 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -154,7 +157,7 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
conv_param
.
GetFlops
();
std
::
size_t
flop
=
conv_param
.
GetFlops
();
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
();
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
In
User
DataType
,
Wei
User
DataType
,
Out
User
DataType
>
();
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
...
@@ -168,8 +171,8 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -168,8 +171,8 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
Tensor
<
CShuffleDataType
>
c_host
(
out_g_n_k_wos_desc
);
Tensor
<
CShuffleDataType
>
c_host
(
out_g_n_k_wos_desc
);
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InDataType
,
In
User
DataType
,
WeiDataType
,
Wei
User
DataType
,
CShuffleDataType
,
CShuffleDataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
ck
::
bhalf_t
;
// kernel data types
using
WeiDataType
=
ck
::
bhalf_t
;
using
InKernelDataType
=
ck
::
bhalf_t
;
using
AccDataType
=
float
;
using
WeiKernelDataType
=
ck
::
bhalf_t
;
using
CShuffleDataType
=
float
;
using
AccDataType
=
float
;
using
BiasDataType
=
ck
::
bhalf_t
;
using
CShuffleDataType
=
float
;
using
ResidualDataType
=
ck
::
bhalf_t
;
using
BiasKernelDataType
=
ck
::
bhalf_t
;
using
OutDataType
=
ck
::
bhalf_t
;
using
ResidualKernelDataType
=
ck
::
bhalf_t
;
using
OutKernelDataType
=
ck
::
bhalf_t
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
ck
::
half_t
;
// kernel data types
using
WeiDataType
=
ck
::
half_t
;
using
InKernelDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
WeiKernelDataType
=
ck
::
half_t
;
using
CShuffleDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
BiasDataType
=
ck
::
half_t
;
using
CShuffleDataType
=
ck
::
half_t
;
using
ResidualDataType
=
ck
::
half_t
;
using
BiasKernelDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
ResidualKernelDataType
=
ck
::
half_t
;
using
OutKernelDataType
=
ck
::
half_t
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
float
;
// kernel data types
using
WeiDataType
=
float
;
using
InKernelDataType
=
float
;
using
AccDataType
=
float
;
using
WeiKernelDataType
=
float
;
using
CShuffleDataType
=
float
;
using
AccDataType
=
float
;
using
BiasDataType
=
float
;
using
CShuffleDataType
=
float
;
using
ResidualDataType
=
float
;
using
BiasKernelDataType
=
float
;
using
OutDataType
=
float
;
using
ResidualKernelDataType
=
float
;
using
OutKernelDataType
=
float
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
0 → 100644
View file @
451aef90
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "grouped_convnd_fwd_bias_relu_add_common.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
// kernel data types
using
InKernelDataType
=
int8_t
;
using
WeiKernelDataType
=
int8_t
;
using
AccDataType
=
int32_t
;
using
CShuffleDataType
=
int8_t
;
using
BiasKernelDataType
=
int8_t
;
using
ResidualKernelDataType
=
int8_t
;
using
OutKernelDataType
=
int8_t
;
// tensor data types
using
InUserDataType
=
ck
::
int4_t
;
using
WeiUserDataType
=
ck
::
int4_t
;
using
OutUserDataType
=
ck
::
int4_t
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
AddReluAdd
;
static
constexpr
auto
ConvSpec
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
static
constexpr
auto
GemmSpec
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
BiasLayout
,
typename
ResidualLayout
,
typename
OutLayout
>
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
NDimSpatial
,
InLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
InKernelDataType
,
WeiKernelDataType
,
AccDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasKernelDataType
,
ResidualKernelDataType
>
,
OutKernelDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
// ConvForwardSpecialization
GemmSpec
,
// GemmSpecialization
1
,
//
256
,
// BlockSize
128
,
// MPerBlock
256
,
// NPerBlock
64
,
// KPerBlock
16
,
// AK1
16
,
// BK1
32
,
// MPerXdl
32
,
// NPerXdl
2
,
// MXdlPerWave
4
,
// NXdlPerWave
S
<
4
,
64
,
1
>
,
// ABlockTransferThreadClusterLengths_AK0_M_AK1
S
<
1
,
0
,
2
>
,
// ABlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// ABlockTransferSrcAccessOrder
2
,
// ABlockTransferSrcVectorDim
16
,
// ABlockTransferSrcScalarPerVector
16
,
// ABlockTransferDstScalarPerVector_AK1
1
,
// ABlockLdsExtraM
S
<
4
,
64
,
1
>
,
// BBlockTransferThreadClusterLengths_BK0_N_BK1
S
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder
2
,
// BBlockTransferSrcVectorDim
16
,
// BBlockTransferSrcScalarPerVector
16
,
// BBlockTransferDstScalarPerVector_BK1
1
,
// BBlockLdsExtraN
1
,
1
,
S
<
1
,
64
,
1
,
4
>
,
16
>
;
int
main
(
int
argc
,
char
*
argv
[])
{
namespace
ctc
=
ck
::
tensor_layout
::
convolution
;
print_helper_msg
();
bool
do_verification
=
true
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
// conventional group conv definition
// G = 2
// [N, C, Hi, Wi] = [128, 384, 71, 71]
// [K, C, Y, X] = [512, 192, 3, 3]
// [N, K, Ho, Wo] = [128, 512, 36, 36]
// CK group conv definition
// [G, N, C, Hi, Wi] = [2, 128, 192, 71, 71]
// [G, K, C, Y, X] = [2, 256, 192, 3, 3]
// [G, N, K, Ho, Wo] = [2, 128, 256, 36, 36]
ck
::
utils
::
conv
::
ConvParam
conv_param
{
2
,
2
,
128
,
256
,
192
,
{
3
,
3
},
{
71
,
71
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
if
(
argc
==
1
)
{
// use default
}
else
if
(
argc
==
4
)
{
do_verification
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
time_kernel
=
std
::
stoi
(
argv
[
3
]);
}
else
{
do_verification
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
time_kernel
=
std
::
stoi
(
argv
[
3
]);
const
ck
::
index_t
num_dim_spatial
=
std
::
stoi
(
argv
[
4
]);
conv_param
=
ck
::
utils
::
conv
::
parse_conv_param
(
num_dim_spatial
,
5
,
argv
);
}
const
auto
in_element_op
=
InElementOp
{};
const
auto
wei_element_op
=
WeiElementOp
{};
const
auto
out_element_op
=
OutElementOp
{};
if
(
conv_param
.
num_dim_spatial_
==
1
)
{
using
InLayout
=
ctc
::
G_NW_C
;
using
WeiLayout
=
ctc
::
G_K_X_C
;
using
BiasLayout
=
ctc
::
G_NW_K
;
using
ResidualLayout
=
ctc
::
G_NW_K
;
using
OutLayout
=
ctc
::
G_NW_K
;
const
auto
in_g_n_c_wis_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
C_
,
conv_param
.
input_spatial_lengths_
[
0
]},
{
conv_param
.
C_
,
// g
conv_param
.
input_spatial_lengths_
[
0
]
*
conv_param
.
G_
*
conv_param
.
C_
,
// n
1
,
// c
conv_param
.
G_
*
conv_param
.
C_
// wi
});
const
auto
wei_g_k_c_xs_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
K_
,
conv_param
.
C_
,
conv_param
.
filter_spatial_lengths_
[
0
]},
{
conv_param
.
K_
*
conv_param
.
filter_spatial_lengths_
[
0
]
*
conv_param
.
C_
,
// g
conv_param
.
filter_spatial_lengths_
[
0
]
*
conv_param
.
C_
,
// k
1
,
// c
conv_param
.
C_
// x
});
const
auto
bias_g_n_k_wos_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
]},
{
conv_param
.
K_
,
// g
0
,
// k
1
,
// c
0
// x
});
const
auto
residual_g_n_k_wos_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
]},
{
conv_param
.
K_
,
// g
0
,
// k
1
,
// c
0
// x
});
const
auto
out_g_n_k_wos_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
]},
{
conv_param
.
K_
,
// g
conv_param
.
output_spatial_lengths_
[
0
]
*
conv_param
.
G_
*
conv_param
.
K_
,
// n
1
,
// k
conv_param
.
G_
*
conv_param
.
K_
// wo
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InKernelDataType
,
WeiKernelDataType
,
CShuffleDataType
,
OutKernelDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
WeiLayout
,
BiasLayout
,
ResidualLayout
,
OutLayout
>>
(
do_verification
,
init_method
,
time_kernel
,
conv_param
,
in_g_n_c_wis_desc
,
wei_g_k_c_xs_desc
,
bias_g_n_k_wos_desc
,
residual_g_n_k_wos_desc
,
out_g_n_k_wos_desc
,
in_element_op
,
wei_element_op
,
out_element_op
);
}
else
if
(
conv_param
.
num_dim_spatial_
==
2
)
{
using
InLayout
=
ctc
::
G_NHW_C
;
using
WeiLayout
=
ctc
::
G_K_YX_C
;
using
BiasLayout
=
ctc
::
G_NHW_K
;
using
ResidualLayout
=
ctc
::
G_NHW_K
;
using
OutLayout
=
ctc
::
G_NHW_K
;
const
auto
in_g_n_c_wis_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
C_
,
conv_param
.
input_spatial_lengths_
[
0
],
conv_param
.
input_spatial_lengths_
[
1
]},
{
conv_param
.
C_
,
// g
conv_param
.
input_spatial_lengths_
[
0
]
*
conv_param
.
input_spatial_lengths_
[
1
]
*
conv_param
.
G_
*
conv_param
.
C_
,
// n
1
,
// c
conv_param
.
input_spatial_lengths_
[
1
]
*
conv_param
.
G_
*
conv_param
.
C_
,
// hi
conv_param
.
G_
*
conv_param
.
C_
// wi
});
const
auto
wei_g_k_c_xs_desc
=
HostTensorDescriptor
({
conv_param
.
G_
,
conv_param
.
K_
,
conv_param
.
C_
,
conv_param
.
filter_spatial_lengths_
[
0
],
conv_param
.
filter_spatial_lengths_
[
1
]},
{
conv_param
.
K_
*
conv_param
.
filter_spatial_lengths_
[
0
]
*
conv_param
.
filter_spatial_lengths_
[
1
]
*
conv_param
.
C_
,
// g
conv_param
.
filter_spatial_lengths_
[
0
]
*
conv_param
.
filter_spatial_lengths_
[
1
]
*
conv_param
.
C_
,
// k
1
,
// c
conv_param
.
filter_spatial_lengths_
[
1
]
*
conv_param
.
C_
,
// y
conv_param
.
C_
// x
});
const
auto
bias_g_n_k_wos_desc
=
HostTensorDescriptor
({
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
],
conv_param
.
output_spatial_lengths_
[
1
]},
{
conv_param
.
K_
,
// g
0
,
// n
1
,
// k
0
,
// ho
0
// wo
});
const
auto
residual_g_n_k_wos_desc
=
HostTensorDescriptor
({
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
],
conv_param
.
output_spatial_lengths_
[
1
]},
{
conv_param
.
K_
,
// g
0
,
// n
1
,
// k
0
,
// ho
0
// wo
});
const
auto
out_g_n_k_wos_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
],
conv_param
.
output_spatial_lengths_
[
1
]},
{
conv_param
.
K_
,
// g
conv_param
.
output_spatial_lengths_
[
0
]
*
conv_param
.
output_spatial_lengths_
[
1
]
*
conv_param
.
G_
*
conv_param
.
K_
,
// n
1
,
// k
conv_param
.
output_spatial_lengths_
[
1
]
*
conv_param
.
G_
*
conv_param
.
K_
,
// ho
conv_param
.
G_
*
conv_param
.
K_
// wo
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InKernelDataType
,
WeiKernelDataType
,
CShuffleDataType
,
OutKernelDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
WeiLayout
,
BiasLayout
,
ResidualLayout
,
OutLayout
>>
(
do_verification
,
init_method
,
time_kernel
,
conv_param
,
in_g_n_c_wis_desc
,
wei_g_k_c_xs_desc
,
bias_g_n_k_wos_desc
,
residual_g_n_k_wos_desc
,
out_g_n_k_wos_desc
,
in_element_op
,
wei_element_op
,
out_element_op
);
}
else
if
(
conv_param
.
num_dim_spatial_
==
3
)
{
using
InLayout
=
ctc
::
G_NDHW_C
;
using
WeiLayout
=
ctc
::
G_K_ZYX_C
;
using
BiasLayout
=
ctc
::
G_NDHW_K
;
using
ResidualLayout
=
ctc
::
G_NDHW_K
;
using
OutLayout
=
ctc
::
G_NDHW_K
;
const
auto
in_g_n_c_wis_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
C_
,
conv_param
.
input_spatial_lengths_
[
0
],
conv_param
.
input_spatial_lengths_
[
1
],
conv_param
.
input_spatial_lengths_
[
2
]},
{
conv_param
.
C_
,
// g
conv_param
.
input_spatial_lengths_
[
0
]
*
conv_param
.
input_spatial_lengths_
[
1
]
*
conv_param
.
input_spatial_lengths_
[
2
]
*
conv_param
.
G_
*
conv_param
.
C_
,
// n
1
,
// c
conv_param
.
input_spatial_lengths_
[
1
]
*
conv_param
.
input_spatial_lengths_
[
2
]
*
conv_param
.
G_
*
conv_param
.
C_
,
// di
conv_param
.
input_spatial_lengths_
[
2
]
*
conv_param
.
G_
*
conv_param
.
C_
,
// hi
conv_param
.
G_
*
conv_param
.
C_
// wi
});
const
auto
wei_g_k_c_xs_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
K_
,
conv_param
.
C_
,
conv_param
.
filter_spatial_lengths_
[
0
],
conv_param
.
filter_spatial_lengths_
[
1
],
conv_param
.
filter_spatial_lengths_
[
2
]},
{
conv_param
.
K_
*
conv_param
.
filter_spatial_lengths_
[
0
]
*
conv_param
.
filter_spatial_lengths_
[
1
]
*
conv_param
.
filter_spatial_lengths_
[
2
]
*
conv_param
.
C_
,
// g
conv_param
.
filter_spatial_lengths_
[
0
]
*
conv_param
.
filter_spatial_lengths_
[
1
]
*
conv_param
.
filter_spatial_lengths_
[
2
]
*
conv_param
.
C_
,
// k
1
,
// c
conv_param
.
filter_spatial_lengths_
[
1
]
*
conv_param
.
filter_spatial_lengths_
[
2
]
*
conv_param
.
C_
,
// z
conv_param
.
filter_spatial_lengths_
[
2
]
*
conv_param
.
C_
,
// y
conv_param
.
C_
// x
});
const
auto
bias_g_n_k_wos_desc
=
HostTensorDescriptor
({
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
],
conv_param
.
output_spatial_lengths_
[
1
],
conv_param
.
output_spatial_lengths_
[
2
]},
{
conv_param
.
K_
,
// g
0
,
// n
1
,
// k
0
,
// z
0
,
// y
0
// x
});
const
auto
residual_g_n_k_wos_desc
=
HostTensorDescriptor
({
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
],
conv_param
.
output_spatial_lengths_
[
1
],
conv_param
.
output_spatial_lengths_
[
2
]},
{
conv_param
.
K_
,
// g
0
,
// n
1
,
// k
0
,
// z
0
,
// y
0
// x
});
const
auto
out_g_n_k_wos_desc
=
HostTensorDescriptor
(
{
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
output_spatial_lengths_
[
0
],
conv_param
.
output_spatial_lengths_
[
1
],
conv_param
.
output_spatial_lengths_
[
2
]},
{
conv_param
.
K_
,
// g
conv_param
.
output_spatial_lengths_
[
0
]
*
conv_param
.
output_spatial_lengths_
[
1
]
*
conv_param
.
output_spatial_lengths_
[
2
]
*
conv_param
.
G_
*
conv_param
.
K_
,
// n
1
,
// k
conv_param
.
output_spatial_lengths_
[
1
]
*
conv_param
.
output_spatial_lengths_
[
2
]
*
conv_param
.
G_
*
conv_param
.
K_
,
// do
conv_param
.
output_spatial_lengths_
[
2
]
*
conv_param
.
G_
*
conv_param
.
K_
,
// ho
conv_param
.
G_
*
conv_param
.
K_
// wo
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InKernelDataType
,
WeiKernelDataType
,
CShuffleDataType
,
OutKernelDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
WeiLayout
,
BiasLayout
,
ResidualLayout
,
OutLayout
>>
(
do_verification
,
init_method
,
time_kernel
,
conv_param
,
in_g_n_c_wis_desc
,
wei_g_k_c_xs_desc
,
bias_g_n_k_wos_desc
,
residual_g_n_k_wos_desc
,
out_g_n_k_wos_desc
,
in_element_op
,
wei_element_op
,
out_element_op
);
}
return
0
;
}
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
int8_t
;
// kernel data types
using
WeiDataType
=
int8_t
;
using
InKernelDataType
=
int8_t
;
using
AccDataType
=
int32_t
;
using
WeiKernelDataType
=
int8_t
;
using
CShuffleDataType
=
int8_t
;
using
AccDataType
=
int32_t
;
using
BiasDataType
=
int8_t
;
using
CShuffleDataType
=
int8_t
;
using
ResidualDataType
=
int8_t
;
using
BiasKernelDataType
=
int8_t
;
using
OutDataType
=
int8_t
;
using
ResidualKernelDataType
=
int8_t
;
using
OutKernelDataType
=
int8_t
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
View file @
451aef90
...
@@ -98,6 +98,16 @@ struct AddReluAdd
...
@@ -98,6 +98,16 @@ struct AddReluAdd
int32_t
c
=
b
+
x2
;
int32_t
c
=
b
+
x2
;
y
=
c
;
y
=
c
;
}
}
template
<
>
__host__
__device__
constexpr
void
operator
()
<
int4_t
,
int8_t
,
int4_t
,
int4_t
>
(
int4_t
&
y
,
const
int8_t
&
x0
,
const
int4_t
&
x1
,
const
int4_t
&
x2
)
const
{
int32_t
a
=
x0
+
x1
;
int32_t
b
=
a
>
0
?
a
:
0
;
int32_t
c
=
b
+
x2
;
y
=
c
;
}
};
};
struct
AddHardswishAdd
struct
AddHardswishAdd
...
...
library/include/ck/library/utility/check_err.hpp
View file @
451aef90
...
@@ -150,7 +150,12 @@ check_err(const std::vector<T>& out,
...
@@ -150,7 +150,12 @@ check_err(const std::vector<T>& out,
}
}
template
<
typename
T
>
template
<
typename
T
>
typename
std
::
enable_if
<
std
::
is_integral
<
T
>::
value
&&
!
std
::
is_same
<
T
,
bhalf_t
>::
value
,
bool
>::
type
std
::
enable_if_t
<
(
std
::
is_integral_v
<
T
>
&&
!
std
::
is_same_v
<
T
,
bhalf_t
>
)
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
||
std
::
is_same_v
<
T
,
int4_t
>
#endif
,
bool
>
check_err
(
const
std
::
vector
<
T
>&
out
,
check_err
(
const
std
::
vector
<
T
>&
out
,
const
std
::
vector
<
T
>&
ref
,
const
std
::
vector
<
T
>&
ref
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment