Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
e70a4d19
Commit
e70a4d19
authored
Dec 13, 2023
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
ce72f286
0dacd895
Changes
472
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
165 additions
and
89 deletions
+165
-89
client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
...tization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
+1
-1
client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp
...antization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp
+12
-12
client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp
...le/09_quantization/conv2d_fwd_perchannel_quantization.cpp
+13
-13
client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp
...mple/09_quantization/conv2d_fwd_perlayer_quantization.cpp
+12
-12
client_example/10_grouped_convnd_bwd_data/CMakeLists.txt
client_example/10_grouped_convnd_bwd_data/CMakeLists.txt
+3
-3
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
+6
-4
client_example/11_grouped_conv_bwd_weight/common.hpp
client_example/11_grouped_conv_bwd_weight/common.hpp
+6
-2
client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp
...wd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp
+70
-0
client_example/12_elementwise_normalization/CMakeLists.txt
client_example/12_elementwise_normalization/CMakeLists.txt
+1
-1
client_example/13_batchnorm/CMakeLists.txt
client_example/13_batchnorm/CMakeLists.txt
+3
-3
client_example/14_instance_id/CMakeLists.txt
client_example/14_instance_id/CMakeLists.txt
+1
-1
client_example/15_convnd_bwd_data/CMakeLists.txt
client_example/15_convnd_bwd_data/CMakeLists.txt
+2
-2
client_example/15_gemm_add_multiply/CMakeLists.txt
client_example/15_gemm_add_multiply/CMakeLists.txt
+1
-1
client_example/15_reduce/CMakeLists.txt
client_example/15_reduce/CMakeLists.txt
+1
-1
client_example/16_convnd_fwd/CMakeLists.txt
client_example/16_convnd_fwd/CMakeLists.txt
+3
-3
client_example/16_convnd_fwd/common.hpp
client_example/16_convnd_fwd/common.hpp
+14
-14
client_example/17_grouped_gemm_fastgelu/CMakeLists.txt
client_example/17_grouped_gemm_fastgelu/CMakeLists.txt
+1
-1
client_example/18_groupnorm/CMakeLists.txt
client_example/18_groupnorm/CMakeLists.txt
+1
-1
client_example/18_groupnorm/groupnorm_swish.cpp
client_example/18_groupnorm/groupnorm_swish.cpp
+10
-10
client_example/19_pool/CMakeLists.txt
client_example/19_pool/CMakeLists.txt
+4
-4
No files found.
client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp
View file @
e70a4d19
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
...
@@ -83,7 +83,7 @@ int main(int argc, char* argv[])
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
...
...
client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp
View file @
e70a4d19
...
@@ -79,18 +79,18 @@ int main(int argc, char* argv[])
...
@@ -79,18 +79,18 @@ int main(int argc, char* argv[])
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
>
,
ck
::
Tuple
<
BiasLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<
BiasDataType
>
,
ck
::
Tuple
<
BiasDataType
>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp
View file @
e70a4d19
...
@@ -76,19 +76,19 @@ int main(int argc, char* argv[])
...
@@ -76,19 +76,19 @@ int main(int argc, char* argv[])
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
requant_scale
(
sizeof
(
RequantScaleDataType
)
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleABD
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<
RequantScaleLayout
>
,
ck
::
Tuple
<
RequantScaleLayout
>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<
RequantScaleDataType
>
,
ck
::
Tuple
<
RequantScaleDataType
>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp
View file @
e70a4d19
...
@@ -72,18 +72,18 @@ int main(int argc, char* argv[])
...
@@ -72,18 +72,18 @@ int main(int argc, char* argv[])
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
OutElementOp
>
;
OutElementOp
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/10_grouped_convnd_bwd_data/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_grouped_conv2d_bwd_data grouped_conv2d_bwd_data.cpp
)
add_executable
(
client_grouped_conv2d_bwd_data grouped_conv2d_bwd_data.cpp
)
target_link_libraries
(
client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_
conv_
operations
)
add_executable
(
client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp
)
add_executable
(
client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp
)
target_link_libraries
(
client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_
conv_
operations
)
add_executable
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp
)
add_executable
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp
)
target_link_libraries
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_
conv_
operations
)
client_example/11_grouped_conv_bwd_weight/CMakeLists.txt
View file @
e70a4d19
...
@@ -2,8 +2,10 @@ add_executable(client_grouped_conv1d_bwd_weight_fp16 grouped_conv1d_bwd_weight_f
...
@@ -2,8 +2,10 @@ add_executable(client_grouped_conv1d_bwd_weight_fp16 grouped_conv1d_bwd_weight_f
add_executable
(
client_grouped_conv2d_bwd_weight_fp16 grouped_conv2d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv2d_bwd_weight_fp16 grouped_conv2d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_fp16.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp
)
add_executable
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp
)
target_link_libraries
(
client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations
)
target_link_libraries
(
client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations
)
client_example/11_grouped_conv_bwd_weight/common.hpp
View file @
e70a4d19
...
@@ -85,7 +85,9 @@ template <ck::index_t NumDimSpatial,
...
@@ -85,7 +85,9 @@ template <ck::index_t NumDimSpatial,
typename
OutDataType
,
typename
OutDataType
,
typename
InLayout
,
typename
InLayout
,
typename
WeiLayout
,
typename
WeiLayout
,
typename
OutLayout
>
typename
OutLayout
,
typename
AComputeType
=
InDataType
,
typename
BComputeType
=
AComputeType
>
bool
run_grouped_conv_bwd_weight
(
bool
run_grouped_conv_bwd_weight
(
const
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>&
input_lengths
,
const
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>&
input_lengths
,
const
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>&
input_strides
,
const
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>&
input_strides
,
...
@@ -113,7 +115,9 @@ bool run_grouped_conv_bwd_weight(
...
@@ -113,7 +115,9 @@ bool run_grouped_conv_bwd_weight(
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
>
;
PassThrough
,
AComputeType
,
BComputeType
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp
0 → 100644
View file @
e70a4d19
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
using
InDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NDHWGK
;
using
AComputeType
=
ck
::
bf8_t
;
using
BComputeType
=
ck
::
f8_t
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
3
;
static
constexpr
ck
::
index_t
G
=
8
;
static
constexpr
ck
::
index_t
N
=
64
;
static
constexpr
ck
::
index_t
K
=
128
;
static
constexpr
ck
::
index_t
C
=
128
;
static
constexpr
ck
::
index_t
Z
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Di
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
3
;
static
constexpr
ck
::
index_t
Do
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
3
;
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
input_lengths
{
G
,
N
,
C
,
Di
,
Hi
,
Wi
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
filter_lengths
{
G
,
K
,
C
,
Z
,
Y
,
X
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
output_lengths
{
G
,
N
,
K
,
Do
,
Ho
,
Wo
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
input_strides
{
N
*
Di
*
Hi
*
Wi
*
C
,
Di
*
Hi
*
Wi
*
C
,
1
,
Hi
*
Wi
*
C
,
Wi
*
C
,
C
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
weights_strides
{
K
*
Z
*
Y
*
X
*
C
,
Z
*
Y
*
X
*
C
,
1
,
Y
*
X
*
C
,
X
*
C
,
C
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
output_strides
{
N
*
Do
*
Ho
*
Wo
*
K
,
Do
*
Ho
*
Wo
*
K
,
1
,
Ho
*
Wo
*
K
,
Wo
*
K
,
K
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
conv_filter_strides
{
1
,
1
,
1
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
conv_filter_dilations
{
1
,
1
,
1
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_left_pads
{
1
,
1
,
1
};
static
constexpr
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_right_pads
{
1
,
1
,
1
};
int
main
()
{
return
run_grouped_conv_bwd_weight
<
NumDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InLayout
,
WeiLayout
,
OutLayout
,
AComputeType
,
BComputeType
>
(
input_lengths
,
input_strides
,
filter_lengths
,
weights_strides
,
output_lengths
,
output_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
)
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
}
client_example/12_elementwise_normalization/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_elementwise_layernorm2d elementwise_layernorm2d.cpp
)
add_executable
(
client_elementwise_layernorm2d elementwise_layernorm2d.cpp
)
target_link_libraries
(
client_elementwise_layernorm2d PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_elementwise_layernorm2d PRIVATE composable_kernel::device_
other_
operations
)
client_example/13_batchnorm/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_batchnorm_fwd_nhwc batchnorm_fwd_nhwc.cpp
)
add_executable
(
client_batchnorm_fwd_nhwc batchnorm_fwd_nhwc.cpp
)
add_executable
(
client_batchnorm_bwd_nhwc batchnorm_bwd_nhwc.cpp
)
add_executable
(
client_batchnorm_bwd_nhwc batchnorm_bwd_nhwc.cpp
)
add_executable
(
client_batchnorm_infer_nhwc batchnorm_infer_nhwc.cpp
)
add_executable
(
client_batchnorm_infer_nhwc batchnorm_infer_nhwc.cpp
)
target_link_libraries
(
client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_
other_
operations
)
target_link_libraries
(
client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_
other_
operations
)
target_link_libraries
(
client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_
other_
operations
)
client_example/14_instance_id/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_batchnorm_fwd_instance_id batchnorm_fwd_instance_id.cpp
)
add_executable
(
client_batchnorm_fwd_instance_id batchnorm_fwd_instance_id.cpp
)
target_link_libraries
(
client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_
other_
operations
)
client_example/15_convnd_bwd_data/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_conv3d_bwd_data_fp16 conv3d_bwd_data_fp16.cpp
)
add_executable
(
client_conv3d_bwd_data_fp16 conv3d_bwd_data_fp16.cpp
)
add_executable
(
client_conv3d_bwd_data_fp32 conv3d_bwd_data_fp32.cpp
)
add_executable
(
client_conv3d_bwd_data_fp32 conv3d_bwd_data_fp32.cpp
)
target_link_libraries
(
client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_
conv_
operations
)
target_link_libraries
(
client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_
conv_
operations
)
client_example/15_gemm_add_multiply/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_gemm_add_multiply gemm_add_multiply.cpp
)
add_executable
(
client_gemm_add_multiply gemm_add_multiply.cpp
)
target_link_libraries
(
client_gemm_add_multiply PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_gemm_add_multiply PRIVATE composable_kernel::device_gemm_operations
)
\ No newline at end of file
\ No newline at end of file
client_example/15_reduce/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_reduce_nhwc_c reduce_nhwc_c.cpp
)
add_executable
(
client_reduce_nhwc_c reduce_nhwc_c.cpp
)
target_link_libraries
(
client_reduce_nhwc_c PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_reduce_nhwc_c PRIVATE composable_kernel::device_
reduction_
operations
)
client_example/16_convnd_fwd/CMakeLists.txt
View file @
e70a4d19
if
((
DTYPES MATCHES
"fp16"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp16"
)
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv3d_fwd_fp16 conv3d_fwd_fp16.cpp
)
add_executable
(
client_conv3d_fwd_fp16 conv3d_fwd_fp16.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_
conv_
operations
)
endif
()
endif
()
if
((
DTYPES MATCHES
"fp8"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp8"
)
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp
)
add_executable
(
client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_
conv_
operations
)
endif
()
endif
()
if
((
DTYPES MATCHES
"fp32"
)
OR NOT DEFINED DTYPES
)
if
((
DTYPES MATCHES
"fp32"
)
OR NOT DEFINED DTYPES
)
add_executable
(
client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp
)
add_executable
(
client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_
conv_
operations
)
endif
()
endif
()
client_example/16_convnd_fwd/common.hpp
View file @
e70a4d19
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_
ab
d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
...
@@ -174,19 +174,19 @@ bool run_grouped_conv_fwd(std::array<ck::index_t, NumDimSpatial + NumNonSpatialD
...
@@ -174,19 +174,19 @@ bool run_grouped_conv_fwd(std::array<ck::index_t, NumDimSpatial + NumNonSpatialD
std
::
size_t
flop
=
GetFlops
<
NumDimSpatial
>
(
out_lengths
,
wei_lengths
);
std
::
size_t
flop
=
GetFlops
<
NumDimSpatial
>
(
out_lengths
,
wei_lengths
);
std
::
size_t
num_bytes
=
in_mem_size
+
wei_mem_size
+
out_mem_size
;
std
::
size_t
num_bytes
=
in_mem_size
+
wei_mem_size
+
out_mem_size
;
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultiple
AB
D
<
NumDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutLayout
,
OutLayout
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
OutDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
PassThrough
,
ComputeType
>
;
ComputeType
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
DeviceOp
>::
GetInstances
();
...
...
client_example/17_grouped_gemm_fastgelu/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_grouped_gemm_fastgelu grouped_gemm_fastgelu.cpp
)
add_executable
(
client_grouped_gemm_fastgelu grouped_gemm_fastgelu.cpp
)
target_link_libraries
(
client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations
)
\ No newline at end of file
\ No newline at end of file
client_example/18_groupnorm/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_groupnorm_swish groupnorm_swish.cpp
)
add_executable
(
client_groupnorm_swish groupnorm_swish.cpp
)
target_link_libraries
(
client_groupnorm_swish PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_groupnorm_swish PRIVATE composable_kernel::device_
other_
operations
)
client_example/18_groupnorm/groupnorm_swish.cpp
View file @
e70a4d19
...
@@ -7,10 +7,10 @@
...
@@ -7,10 +7,10 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_normalization.hpp"
#include "ck/tensor_operation/gpu/device/device_normalization
_fwd
.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization_swish.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization_
fwd_
swish.hpp"
using
XDataType
=
ck
::
half_t
;
using
XDataType
=
ck
::
half_t
;
using
GammaDataType
=
float
;
using
GammaDataType
=
float
;
...
@@ -64,14 +64,14 @@ int main(int argc, char* argv[])
...
@@ -64,14 +64,14 @@ int main(int argc, char* argv[])
SimpleDeviceMem
save_inv_std_device_buf
(
sizeof
(
SaveMeanInvStdDataType
)
*
N
*
G
);
SimpleDeviceMem
save_inv_std_device_buf
(
sizeof
(
SaveMeanInvStdDataType
)
*
N
*
G
);
#endif
#endif
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceNormalization
<
XDataType
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceNormalization
Fwd
<
XDataType
,
GammaDataType
,
GammaDataType
,
BetaDataType
,
BetaDataType
,
YDataType
,
YDataType
,
SaveMeanInvStdDataType
,
SaveMeanInvStdDataType
,
Swish
,
Swish
,
Rank
,
Rank
,
NumReduceDim
>
;
NumReduceDim
>
;
// get device op instances
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
...
...
client_example/19_pool/CMakeLists.txt
View file @
e70a4d19
add_executable
(
client_max_pool2d_fwd max_pool2d_fwd.cpp
)
add_executable
(
client_max_pool2d_fwd max_pool2d_fwd.cpp
)
target_link_libraries
(
client_max_pool2d_fwd PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_max_pool2d_fwd PRIVATE composable_kernel::device_
other_
operations
)
add_executable
(
client_max_pool2d_bwd max_pool2d_bwd.cpp
)
add_executable
(
client_max_pool2d_bwd max_pool2d_bwd.cpp
)
target_link_libraries
(
client_max_pool2d_bwd PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_max_pool2d_bwd PRIVATE composable_kernel::device_
other_
operations
)
add_executable
(
client_avg_pool3d_fwd avg_pool3d_fwd.cpp
)
add_executable
(
client_avg_pool3d_fwd avg_pool3d_fwd.cpp
)
target_link_libraries
(
client_avg_pool3d_fwd PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_avg_pool3d_fwd PRIVATE composable_kernel::device_
other_
operations
)
add_executable
(
client_avg_pool3d_bwd avg_pool3d_bwd.cpp
)
add_executable
(
client_avg_pool3d_bwd avg_pool3d_bwd.cpp
)
target_link_libraries
(
client_avg_pool3d_bwd PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_avg_pool3d_bwd PRIVATE composable_kernel::device_
other_
operations
)
Prev
1
2
3
4
5
6
…
24
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment