Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
451aef90
Commit
451aef90
authored
Aug 22, 2022
by
Rosty Geyyer
Browse files
Add int4 example for convnd_fwd_bias_relu_add
parent
9efd033b
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
647 additions
and
105 deletions
+647
-105
example/30_grouped_convnd_fwd_bias_relu_add/CMakeLists.txt
example/30_grouped_convnd_fwd_bias_relu_add/CMakeLists.txt
+6
-1
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
...bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
+26
-23
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp
+35
-20
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp
+35
-20
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp
+35
-20
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
+459
-0
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
...as_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
+35
-20
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
...k/tensor_operation/gpu/element/element_wise_operation.hpp
+10
-0
library/include/ck/library/utility/check_err.hpp
library/include/ck/library/utility/check_err.hpp
+6
-1
No files found.
example/30_grouped_convnd_fwd_bias_relu_add/CMakeLists.txt
View file @
451aef90
...
@@ -8,4 +8,9 @@ add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 grouped
...
@@ -8,4 +8,9 @@ add_example_executable(example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 grouped
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 PRIVATE utility
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_bf16 PRIVATE utility
)
add_example_executable
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
)
add_example_executable
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 PRIVATE utility
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int8 PRIVATE utility
)
\ No newline at end of file
if
(
USE_BITINT_EXTENSION_INT4
)
add_example_executable
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int4 grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
)
target_link_libraries
(
example_grouped_convnd_fwd_bias_relu_add_xdl_int4 PRIVATE utility
)
endif
()
# USE_BITINT_EXTENSION_INT4
\ No newline at end of file
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
View file @
451aef90
...
@@ -26,13 +26,16 @@ void print_helper_msg()
...
@@ -26,13 +26,16 @@ void print_helper_msg()
}
}
template
<
ck
::
index_t
NDimSpatial
,
template
<
ck
::
index_t
NDimSpatial
,
typename
InDataType
,
typename
In
Kernel
DataType
,
typename
WeiDataType
,
typename
Wei
Kernel
DataType
,
typename
CShuffleDataType
,
typename
CShuffleDataType
,
typename
OutDataType
,
typename
Out
Kernel
DataType
,
typename
InElementOp
,
typename
InElementOp
,
typename
WeiElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
OutElementOp
,
typename
InUserDataType
,
typename
WeiUserDataType
,
typename
OutUserDataType
,
typename
DeviceConvNDFwdInstance
>
typename
DeviceConvNDFwdInstance
>
int
run_grouped_conv_fwd_bias_relu_add
(
bool
do_verification
,
int
run_grouped_conv_fwd_bias_relu_add
(
bool
do_verification
,
int
init_method
,
int
init_method
,
...
@@ -47,12 +50,12 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -47,12 +50,12 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
const
WeiElementOp
&
wei_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
const
OutElementOp
&
out_element_op
)
{
{
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
In
User
DataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
Wei
User
DataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
OutDataType
>
bias
(
bias_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
bias
(
bias_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
residual
(
residual_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
residual
(
residual_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
out_host
(
out_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
out_host
(
out_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
out_device
(
out_g_n_k_wos_desc
);
Tensor
<
Out
User
DataType
>
out_device
(
out_g_n_k_wos_desc
);
std
::
cout
<<
"in: "
<<
in
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"in: "
<<
in
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
...
@@ -64,21 +67,21 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -64,21 +67,21 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
{
{
case
0
:
break
;
case
0
:
break
;
case
1
:
case
1
:
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
In
User
DataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
Wei
User
DataType
>
{
-
5
,
5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_2
<
Out
User
DataType
>
{
-
5
,
5
});
break
;
break
;
default:
default:
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
InDataType
>
{
0.0
,
1.0
});
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
In
User
DataType
>
{
0.0
,
1.0
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
Wei
User
DataType
>
{
-
0.5
,
0.5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
-
0.5
,
0.5
});
bias
.
GenerateTensorValue
(
GeneratorTensor_3
<
Out
User
DataType
>
{
-
0.5
,
0.5
});
}
}
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
in_device_buf
(
sizeof
(
In
User
DataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
Wei
User
DataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
bias_device_buf
(
sizeof
(
OutDataType
)
*
bias
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
bias_device_buf
(
sizeof
(
Out
User
DataType
)
*
bias
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
residual_device_buf
(
sizeof
(
OutDataType
)
*
residual
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
residual_device_buf
(
sizeof
(
Out
User
DataType
)
*
residual
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out_device
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_device_buf
(
sizeof
(
Out
User
DataType
)
*
out_device
.
mDesc
.
GetElementSpaceSize
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
...
@@ -154,7 +157,7 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -154,7 +157,7 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
conv_param
.
GetFlops
();
std
::
size_t
flop
=
conv_param
.
GetFlops
();
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
();
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
In
User
DataType
,
Wei
User
DataType
,
Out
User
DataType
>
();
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
...
@@ -168,8 +171,8 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
...
@@ -168,8 +171,8 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
Tensor
<
CShuffleDataType
>
c_host
(
out_g_n_k_wos_desc
);
Tensor
<
CShuffleDataType
>
c_host
(
out_g_n_k_wos_desc
);
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InDataType
,
In
User
DataType
,
WeiDataType
,
Wei
User
DataType
,
CShuffleDataType
,
CShuffleDataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_bf16.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
ck
::
bhalf_t
;
// kernel data types
using
WeiDataType
=
ck
::
bhalf_t
;
using
InKernelDataType
=
ck
::
bhalf_t
;
using
AccDataType
=
float
;
using
WeiKernelDataType
=
ck
::
bhalf_t
;
using
CShuffleDataType
=
float
;
using
AccDataType
=
float
;
using
BiasDataType
=
ck
::
bhalf_t
;
using
CShuffleDataType
=
float
;
using
ResidualDataType
=
ck
::
bhalf_t
;
using
BiasKernelDataType
=
ck
::
bhalf_t
;
using
OutDataType
=
ck
::
bhalf_t
;
using
ResidualKernelDataType
=
ck
::
bhalf_t
;
using
OutKernelDataType
=
ck
::
bhalf_t
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp16.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
ck
::
half_t
;
// kernel data types
using
WeiDataType
=
ck
::
half_t
;
using
InKernelDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
WeiKernelDataType
=
ck
::
half_t
;
using
CShuffleDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
BiasDataType
=
ck
::
half_t
;
using
CShuffleDataType
=
ck
::
half_t
;
using
ResidualDataType
=
ck
::
half_t
;
using
BiasKernelDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
ResidualKernelDataType
=
ck
::
half_t
;
using
OutKernelDataType
=
ck
::
half_t
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_fp32.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
float
;
// kernel data types
using
WeiDataType
=
float
;
using
InKernelDataType
=
float
;
using
AccDataType
=
float
;
using
WeiKernelDataType
=
float
;
using
CShuffleDataType
=
float
;
using
AccDataType
=
float
;
using
BiasDataType
=
float
;
using
CShuffleDataType
=
float
;
using
ResidualDataType
=
float
;
using
BiasKernelDataType
=
float
;
using
OutDataType
=
float
;
using
ResidualKernelDataType
=
float
;
using
OutKernelDataType
=
float
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int4.cpp
0 → 100644
View file @
451aef90
This diff is collapsed.
Click to expand it.
example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_xdl_int8.cpp
View file @
451aef90
...
@@ -7,13 +7,19 @@
...
@@ -7,13 +7,19 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
using
InDataType
=
int8_t
;
// kernel data types
using
WeiDataType
=
int8_t
;
using
InKernelDataType
=
int8_t
;
using
AccDataType
=
int32_t
;
using
WeiKernelDataType
=
int8_t
;
using
CShuffleDataType
=
int8_t
;
using
AccDataType
=
int32_t
;
using
BiasDataType
=
int8_t
;
using
CShuffleDataType
=
int8_t
;
using
ResidualDataType
=
int8_t
;
using
BiasKernelDataType
=
int8_t
;
using
OutDataType
=
int8_t
;
using
ResidualKernelDataType
=
int8_t
;
using
OutKernelDataType
=
int8_t
;
// tensor data types
using
InUserDataType
=
InKernelDataType
;
using
WeiUserDataType
=
WeiKernelDataType
;
using
OutUserDataType
=
OutKernelDataType
;
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -40,12 +46,12 @@ using DeviceGroupedConvNDFwdInstance =
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
ck
::
Tuple
<
BiasLayout
,
ResidualLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
AccDataType
,
AccDataType
,
CShuffleDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
,
ResidualDataType
>
,
ck
::
Tuple
<
Bias
Kernel
DataType
,
Residual
Kernel
DataType
>
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
...
@@ -181,13 +187,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
return
run_grouped_conv_fwd_bias_relu_add
<
1
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
1
,
DeviceGroupedConvNDFwdInstance
<
1
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
...
@@ -290,13 +299,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
return
run_grouped_conv_fwd_bias_relu_add
<
2
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
2
,
DeviceGroupedConvNDFwdInstance
<
2
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
...
@@ -413,13 +425,16 @@ int main(int argc, char* argv[])
});
});
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
return
run_grouped_conv_fwd_bias_relu_add
<
3
,
InDataType
,
In
Kernel
DataType
,
WeiDataType
,
Wei
Kernel
DataType
,
CShuffleDataType
,
CShuffleDataType
,
OutDataType
,
Out
Kernel
DataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
InUserDataType
,
WeiUserDataType
,
OutUserDataType
,
DeviceGroupedConvNDFwdInstance
<
3
,
DeviceGroupedConvNDFwdInstance
<
3
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
View file @
451aef90
...
@@ -98,6 +98,16 @@ struct AddReluAdd
...
@@ -98,6 +98,16 @@ struct AddReluAdd
int32_t
c
=
b
+
x2
;
int32_t
c
=
b
+
x2
;
y
=
c
;
y
=
c
;
}
}
template
<
>
__host__
__device__
constexpr
void
operator
()
<
int4_t
,
int8_t
,
int4_t
,
int4_t
>
(
int4_t
&
y
,
const
int8_t
&
x0
,
const
int4_t
&
x1
,
const
int4_t
&
x2
)
const
{
int32_t
a
=
x0
+
x1
;
int32_t
b
=
a
>
0
?
a
:
0
;
int32_t
c
=
b
+
x2
;
y
=
c
;
}
};
};
struct
AddHardswishAdd
struct
AddHardswishAdd
...
...
library/include/ck/library/utility/check_err.hpp
View file @
451aef90
...
@@ -150,7 +150,12 @@ check_err(const std::vector<T>& out,
...
@@ -150,7 +150,12 @@ check_err(const std::vector<T>& out,
}
}
template
<
typename
T
>
template
<
typename
T
>
typename
std
::
enable_if
<
std
::
is_integral
<
T
>::
value
&&
!
std
::
is_same
<
T
,
bhalf_t
>::
value
,
bool
>::
type
std
::
enable_if_t
<
(
std
::
is_integral_v
<
T
>
&&
!
std
::
is_same_v
<
T
,
bhalf_t
>
)
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
||
std
::
is_same_v
<
T
,
int4_t
>
#endif
,
bool
>
check_err
(
const
std
::
vector
<
T
>&
out
,
check_err
(
const
std
::
vector
<
T
>&
out
,
const
std
::
vector
<
T
>&
ref
,
const
std
::
vector
<
T
>&
ref
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment