Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7a4f83e0
Commit
7a4f83e0
authored
Nov 08, 2023
by
Bartlomiej Kocot
Browse files
Replace multiD with multiABD
parent
95479e67
Changes
115
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
116 additions
and
116 deletions
+116
-116
client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp
client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp
+12
-12
client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp
client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp
+12
-12
client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp
...tization/conv2d_fwd_bias_relu_perchannel_quantization.cpp
+1
-1
client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp
...antization/conv2d_fwd_bias_relu_perlayer_quantization.cpp
+12
-12
client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
...tization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
+1
-1
client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp
...antization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp
+12
-12
client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp
...le/09_quantization/conv2d_fwd_perchannel_quantization.cpp
+13
-13
client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp
...mple/09_quantization/conv2d_fwd_perlayer_quantization.cpp
+12
-12
client_example/16_convnd_fwd/common.hpp
client_example/16_convnd_fwd/common.hpp
+14
-14
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
...scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
+1
-1
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc
...d_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc
+12
-12
example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp
example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp
+2
-2
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
+2
-2
example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp
+2
-2
example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp
+2
-2
example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp
example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp
+2
-2
example/30_grouped_conv_fwd_multiple_d/README.md
example/30_grouped_conv_fwd_multiple_d/README.md
+1
-1
example/30_grouped_conv_fwd_multiple_d/common.hpp
example/30_grouped_conv_fwd_multiple_d/common.hpp
+1
-1
example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc
...multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc
+1
-1
example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc
...uped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc
+1
-1
No files found.
client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp
View file @
7a4f83e0
...
@@ -100,18 +100,18 @@ int main()
...
@@ -100,18 +100,18 @@ int main()
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
X
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
G
*
N
*
Wo
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
G
*
N
*
Wo
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
>
;
PassThrough
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
...
...
client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp
View file @
7a4f83e0
...
@@ -71,18 +71,18 @@ int main()
...
@@ -71,18 +71,18 @@ int main()
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
>
;
PassThrough
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
...
...
client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp
View file @
7a4f83e0
...
@@ -80,7 +80,7 @@ int main(int argc, char* argv[])
...
@@ -80,7 +80,7 @@ int main(int argc, char* argv[])
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp
View file @
7a4f83e0
...
@@ -78,18 +78,18 @@ int main(int argc, char* argv[])
...
@@ -78,18 +78,18 @@ int main(int argc, char* argv[])
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
>
,
ck
::
Tuple
<
BiasLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<
BiasDataType
>
,
ck
::
Tuple
<
BiasDataType
>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
View file @
7a4f83e0
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp
View file @
7a4f83e0
...
@@ -79,18 +79,18 @@ int main(int argc, char* argv[])
...
@@ -79,18 +79,18 @@ int main(int argc, char* argv[])
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
>
,
ck
::
Tuple
<
BiasLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<
BiasDataType
>
,
ck
::
Tuple
<
BiasDataType
>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp
View file @
7a4f83e0
...
@@ -76,19 +76,19 @@ int main(int argc, char* argv[])
...
@@ -76,19 +76,19 @@ int main(int argc, char* argv[])
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleABD
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
RequantScaleLayout
>
,
ck
::
Tuple
<
RequantScaleLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<
RequantScaleDataType
>
,
ck
::
Tuple
<
RequantScaleDataType
>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp
View file @
7a4f83e0
...
@@ -72,18 +72,18 @@ int main(int argc, char* argv[])
...
@@ -72,18 +72,18 @@ int main(int argc, char* argv[])
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/16_convnd_fwd/common.hpp
View file @
7a4f83e0
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_
ab
d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
...
@@ -174,19 +174,19 @@ bool run_grouped_conv_fwd(std::array<ck::index_t, NumDimSpatial + NumNonSpatialD
...
@@ -174,19 +174,19 @@ bool run_grouped_conv_fwd(std::array<ck::index_t, NumDimSpatial + NumNonSpatialD
std
::
size_t
flop
=
GetFlops
<
NumDimSpatial
>
(
out_lengths
,
wei_lengths
);
std
::
size_t
flop
=
GetFlops
<
NumDimSpatial
>
(
out_lengths
,
wei_lengths
);
std
::
size_t
num_bytes
=
in_mem_size
+
wei_mem_size
+
out_mem_size
;
std
::
size_t
num_bytes
=
in_mem_size
+
wei_mem_size
+
out_mem_size
;
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
ComputeType
>
;
ComputeType
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
View file @
7a4f83e0
...
@@ -76,7 +76,7 @@ int execute_conv_fwd_scaleadd_scaleadd_relu()
...
@@ -76,7 +76,7 @@ int execute_conv_fwd_scaleadd_scaleadd_relu()
SimpleDeviceMem
d0
(
sizeof
(
std
::
tuple_element_t
<
0
,
DDataTypes
>
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
d0
(
sizeof
(
std
::
tuple_element_t
<
0
,
DDataTypes
>
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
d1
(
sizeof
(
std
::
tuple_element_t
<
1
,
DDataTypes
>
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
d1
(
sizeof
(
std
::
tuple_element_t
<
1
,
DDataTypes
>
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc
View file @
7a4f83e0
...
@@ -86,18 +86,18 @@ int execute_conv_fwd_scaleadd_ab()
...
@@ -86,18 +86,18 @@ int execute_conv_fwd_scaleadd_ab()
SimpleDeviceMem
wei_bias
(
sizeof
(
WeightBiasDtype
)
*
G
*
K
*
Z
*
Y
*
X
*
C
);
SimpleDeviceMem
wei_bias
(
sizeof
(
WeightBiasDtype
)
*
G
*
K
*
Z
*
Y
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
ScaleAdd
,
ScaleAdd
,
ScaleAdd
,
ScaleAdd
,
PassThrough
>
;
PassThrough
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
...
...
example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp
View file @
7a4f83e0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "convnd_fwd_common.hpp"
#include "convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_
ab
d_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
using
DeviceGroupedConvNDFwdInstance
=
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
View file @
7a4f83e0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "convnd_fwd_common.hpp"
#include "convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_
ab
d_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
using
DeviceGroupedConvNDFwdInstance
=
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp
View file @
7a4f83e0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "convnd_fwd_common.hpp"
#include "convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_
ab
d_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
using
DeviceGroupedConvNDFwdInstance
=
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp
View file @
7a4f83e0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "convnd_fwd_common.hpp"
#include "convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_
ab
d_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
using
DeviceGroupedConvNDFwdInstance
=
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp
View file @
7a4f83e0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "convnd_fwd_common.hpp"
#include "convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_
ab
d_xdl_cshuffle.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
...
@@ -27,7 +27,7 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
>
using
DeviceGroupedConvNDFwdInstance
=
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
example/30_grouped_conv_fwd_multiple_d/README.md
View file @
7a4f83e0
...
@@ -26,5 +26,5 @@ out: dim 5, lengths {1, 128, 256, 36, 36}, strides {256, 331776, 1, 9216, 256}
...
@@ -26,5 +26,5 @@ out: dim 5, lengths {1, 128, 256, 36, 36}, strides {256, 331776, 1, 9216, 256}
launch_and_time_kernel: grid_dim {1296, 1, 1}, block_dim {256, 1, 1}
launch_and_time_kernel: grid_dim {1296, 1, 1}, block_dim {256, 1, 1}
Warm up 1 time
Warm up 1 time
Start running 10 times...
Start running 10 times...
Perf: 1.55981 ms, 94.0927 TFlops, 213.868 GB/s, DeviceGroupedConvFwdMultipleD_Xdl_CShuffle<256, 128, 256, 16, Default>
Perf: 1.55981 ms, 94.0927 TFlops, 213.868 GB/s, DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle<256, 128, 256, 16, Default>
```
```
example/30_grouped_conv_fwd_multiple_d/common.hpp
View file @
7a4f83e0
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_
ab
d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
...
...
example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc
View file @
7a4f83e0
...
@@ -34,7 +34,7 @@ using ResidualLayout = typename LayoutSettingSelector<NDimSpatial>::ResidualLayo
...
@@ -34,7 +34,7 @@ using ResidualLayout = typename LayoutSettingSelector<NDimSpatial>::ResidualLayo
template
<
ck
::
index_t
NDimSpatial
>
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvFwdInstance
=
using
DeviceConvFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
...
...
example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc
View file @
7a4f83e0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
template
<
ck
::
index_t
NDimSpatial
>
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvFwdInstance
=
using
DeviceConvFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D_Xdl_CShuffle
<
NDimSpatial
,
NDimSpatial
,
InputLayout
<
NDimSpatial
>
,
InputLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
WeightLayout
<
NDimSpatial
>
,
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment