Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
bf98b476
"docs/vscode:/vscode.git/clone" did not exist on "8ba6e3b4c7270b3f34f70ecda70d4739acb11ca0"
Unverified
Commit
bf98b476
authored
Feb 13, 2024
by
Bartłomiej Kocot
Committed by
GitHub
Feb 13, 2024
Browse files
Add bilinear conv fwd and bwd data instances (#1164)
parent
a78be3f6
Changes
63
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
730 additions
and
149 deletions
+730
-149
example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp
example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp
+0
-11
example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp
example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp
+0
-11
example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp
example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp
+0
-11
example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp
example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp
+0
-11
example/62_convnd_activ/CMakeLists.txt
example/62_convnd_activ/CMakeLists.txt
+17
-0
example/62_convnd_activ/binary/CMakeLists.txt
example/62_convnd_activ/binary/CMakeLists.txt
+13
-0
example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp
...tiv/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp
+266
-0
example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp
...nd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp
+266
-0
example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp
...nvnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp
+14
-14
example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
...nvnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
+14
-14
example/62_convnd_activ/multi_AB/CMakeLists.txt
example/62_convnd_activ/multi_AB/CMakeLists.txt
+17
-0
example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp
...2_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp
+9
-9
example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp
...2_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp
+9
-9
example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp
...2_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp
+9
-9
example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp
...2_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp
+9
-9
example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp
...onvnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp
+11
-11
example/62_convnd_activ/run_convnd_activ_example.inc
example/62_convnd_activ/run_convnd_activ_example.inc
+19
-19
example/62_convnd_activ/unary/CMakeLists.txt
example/62_convnd_activ/unary/CMakeLists.txt
+35
-0
example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp
...e/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp
+11
-11
example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp
example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp
+11
-0
No files found.
example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp
deleted
100644 → 0
View file @
a78be3f6
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Relu
;
using
DeviceGroupedConvNDFwdActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "run_convnd_fwd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_fwd_example
(
argc
,
argv
);
}
example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp
deleted
100644 → 0
View file @
a78be3f6
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Sigmoid
;
using
DeviceGroupedConvNDFwdActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "run_convnd_fwd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_fwd_example
(
argc
,
argv
);
}
example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp
deleted
100644 → 0
View file @
a78be3f6
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
SoftRelu
;
using
DeviceGroupedConvNDFwdActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "run_convnd_fwd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_fwd_example
(
argc
,
argv
);
}
example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp
deleted
100644 → 0
View file @
a78be3f6
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
TanH
;
using
DeviceGroupedConvNDFwdActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "run_convnd_fwd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_fwd_example
(
argc
,
argv
);
}
example/62_convnd_activ/CMakeLists.txt
0 → 100644
View file @
bf98b476
add_subdirectory
(
binary
)
add_subdirectory
(
multi_AB
)
add_subdirectory
(
unary
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_custom_target
(
example_convnd_activ_xdl
)
# ScaleAdd ScaleAdd Relu
add_example_executable
(
example_convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16 convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
)
add_example_dependencies
(
example_convnd_activ_xdl example_convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16
)
add_example_executable
(
example_convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16 convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp
)
add_example_dependencies
(
example_convnd_activ_xdl example_convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16
)
set
(
target 1
)
endif
()
endforeach
()
example/62_convnd_activ/binary/CMakeLists.txt
0 → 100644
View file @
bf98b476
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_custom_target
(
example_convnd_activ_binary_xdl
)
# Bilinear residual
add_example_executable
(
example_convnd_fwd_xdl_bilinear_residual_fp16 convnd_fwd_xdl_bilinear_residual_fp16.cpp
)
add_example_dependencies
(
example_convnd_activ_binary_xdl example_convnd_fwd_xdl_bilinear_residual_fp16
)
add_example_executable
(
example_convnd_bwd_data_xdl_bilinear_residual_fp16 convnd_bwd_data_xdl_bilinear_residual_fp16.cpp
)
add_example_dependencies
(
example_convnd_activ_binary_xdl example_convnd_bwd_data_xdl_bilinear_residual_fp16
)
set
(
target 1
)
endif
()
endforeach
()
example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
constexpr
ck
::
index_t
NDimSpatial
=
3
;
using
InDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
CShuffleDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
GNDHWC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNDHWK
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
Bilinear
;
static
constexpr
auto
ConvSpec
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardDataSpecialization
::
Default
;
template
<
typename
OutElementOp
>
using
DeviceGroupedConvNDBwdDataInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
ck
::
Tuple
<
InLayout
>
,
InLayout
,
OutDataType
,
WeiDataType
,
AccDataType
,
CShuffleDataType
,
ck
::
Tuple
<
InDataType
>
,
InDataType
,
OutElementOp
,
WeiElementOp
,
InElementOp
,
ConvSpec
,
// ConvForwardSpecialization
true
,
true
,
1
,
//
256
,
// BlockSize
128
,
// MPerBlock
256
,
// NPerBlock
32
,
// KPerBlock
8
,
// AK1
2
,
// BK1
32
,
// MPerXdl
32
,
// NPerXdl
2
,
// MXdlPerWave
4
,
// NXdlPerWave
S
<
4
,
64
,
1
>
,
// ABlockTransferThreadClusterLengths_AK0_M_AK1
S
<
1
,
0
,
2
>
,
// ABlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// ABlockTransferSrcAccessOrder
2
,
// ABlockTransferSrcVectorDim
8
,
// ABlockTransferSrcScalarPerVector
8
,
// ABlockTransferDstScalarPerVector_AK1
1
,
// ABlockLdsExtraM
S
<
4
,
64
,
1
>
,
// BBlockTransferThreadClusterLengths_BK0_N_BK1
S
<
0
,
2
,
1
>
,
// BBlockTransferThreadClusterArrangeOrder
S
<
0
,
2
,
1
>
,
// BBlockTransferSrcAccessOrder
1
,
// BBlockTransferSrcVectorDim
4
,
// BBlockTransferSrcScalarPerVector
2
,
// BBlockTransferDstScalarPerVector_BK1
0
,
// BBlockLdsExtraN
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDBwdDataInstance
<
OutElementOp
>
;
namespace
{
// Use custom implementation to pass two more tensors for post op
template
<
ck
::
index_t
NDimSpatial
,
typename
InDataType
,
typename
WeiDataType
,
typename
OutDataType
,
typename
InElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
DeviceConvNDInstance
>
bool
run_grouped_conv
(
bool
do_verification
,
int
init_method
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
InElementOp
&
in_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
{
constexpr
ck
::
index_t
NumDs
=
1
;
Tensor
<
OutDataType
>
out
(
out_g_n_k_wos_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
InDataType
>
in_host
(
in_g_n_c_wis_desc
);
std
::
cout
<<
"out: "
<<
out
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"in: "
<<
in_host
.
mDesc
<<
std
::
endl
;
switch
(
init_method
)
{
case
0
:
break
;
case
1
:
out
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
in_host
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
break
;
default:
out
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
0.0
,
1.0
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
in_host
.
GenerateTensorValue
(
GeneratorTensor_3
<
InDataType
>
{
0.0
,
1.0
});
}
// Initialize based on out_host
Tensor
<
InDataType
>
in_device
(
in_host
);
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_device
.
mDesc
.
GetElementSpaceSize
());
out_device_buf
.
ToDevice
(
out
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
in_device_buf
.
ToDevice
(
in_device
.
mData
.
data
());
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_k_wos_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_k_wos_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_c_wis_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_c_wis_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
a_g_n_k_wos_lengths
);
copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
a_g_n_k_wos_strides
);
copy
(
wei_g_k_c_xs_desc
.
GetLengths
(),
b_g_k_c_xs_lengths
);
copy
(
wei_g_k_c_xs_desc
.
GetStrides
(),
b_g_k_c_xs_strides
);
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
e_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
e_g_n_c_wis_strides
);
copy
(
conv_param
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_param
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_param
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_param
.
input_right_pads_
,
input_right_pads
);
// Use output as D
const
std
::
array
<
const
void
*
,
NumDs
>
ds
=
{
in_device_buf
.
GetDeviceBuffer
()};
auto
conv
=
DeviceConvNDInstance
{};
auto
invoker
=
conv
.
MakeInvoker
();
auto
argument
=
conv
.
MakeArgument
(
out_device_buf
.
GetDeviceBuffer
(),
wei_device_buf
.
GetDeviceBuffer
(),
ds
,
in_device_buf
.
GetDeviceBuffer
(),
a_g_n_k_wos_lengths
,
a_g_n_k_wos_strides
,
b_g_k_c_xs_lengths
,
b_g_k_c_xs_strides
,
std
::
array
<
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
,
NumDs
>
{
e_g_n_c_wis_lengths
},
std
::
array
<
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
,
NumDs
>
{
e_g_n_c_wis_strides
},
e_g_n_c_wis_lengths
,
e_g_n_c_wis_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
out_element_op
,
wei_element_op
,
in_element_op
);
if
(
!
conv
.
IsSupportedArgument
(
argument
))
{
throw
std
::
runtime_error
(
"The device op with the specified compilation parameters does "
"not support this convolution problem."
);
}
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
conv_param
.
GetFlops
()
+
3
*
conv_param
.
GetInputByte
<
InDataType
>
()
/
sizeof
(
InDataType
);
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
()
+
conv_param
.
GetOutputByte
<
InDataType
>
();
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
std
::
cout
<<
"Perf: "
<<
avg_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
conv
.
GetTypeString
()
<<
std
::
endl
;
if
(
do_verification
)
{
std
::
array
<
Tensor
<
OutDataType
>
,
NumDs
>
d_tensors
=
{
in_host
};
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvBwdData
<
NDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
0
,
/*Num A Elementwise Tensors*/
0
,
/*Num B Elementwise Tensors*/
NumDs
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in_host
,
wei
,
out
,
conv_param
.
conv_filter_strides_
,
conv_param
.
conv_filter_dilations_
,
conv_param
.
input_left_pads_
,
conv_param
.
input_right_pads_
,
in_element_op
,
wei_element_op
,
out_element_op
,
{},
{},
d_tensors
);
ref_invoker
.
Run
(
ref_argument
);
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
in_host
.
mData
);
}
return
true
;
}
}
// namespace
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
constexpr
ck
::
index_t
NDimSpatial
=
3
;
using
InDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
CShuffleDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
GNDHWC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKZYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNDHWK
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Bilinear
;
static
constexpr
auto
ConvSpec
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
static
constexpr
auto
GemmSpec
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
template
<
typename
OutElementOp
>
using
DeviceGroupedConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle
<
NDimSpatial
,
InLayout
,
WeiLayout
,
ck
::
Tuple
<
OutLayout
>
,
OutLayout
,
InDataType
,
WeiDataType
,
AccDataType
,
CShuffleDataType
,
ck
::
Tuple
<
OutDataType
>
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
ConvSpec
,
// ConvForwardSpecialization
GemmSpec
,
// GemmSpecialization
1
,
//
256
,
// BlockSize
128
,
// MPerBlock
256
,
// NPerBlock
32
,
// KPerBlock
8
,
// AK1
8
,
// BK1
32
,
// MPerXdl
32
,
// NPerXdl
2
,
// MXdlPerWave
4
,
// NXdlPerWave
S
<
4
,
64
,
1
>
,
// ABlockTransferThreadClusterLengths_AK0_M_AK1
S
<
1
,
0
,
2
>
,
// ABlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// ABlockTransferSrcAccessOrder
2
,
// ABlockTransferSrcVectorDim
8
,
// ABlockTransferSrcScalarPerVector
8
,
// ABlockTransferDstScalarPerVector_AK1
1
,
// ABlockLdsExtraM
S
<
4
,
64
,
1
>
,
// BBlockTransferThreadClusterLengths_BK0_N_BK1
S
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder
2
,
// BBlockTransferSrcVectorDim
8
,
// BBlockTransferSrcScalarPerVector
8
,
// BBlockTransferDstScalarPerVector_BK1
1
,
// BBlockLdsExtraN
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
namespace
{
// Use custom implementation to pass two more tensors for post op
template
<
ck
::
index_t
NDimSpatial
,
typename
InDataType
,
typename
WeiDataType
,
typename
OutDataType
,
typename
InElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
DeviceConvNDFwdInstance
>
bool
run_grouped_conv
(
bool
do_verification
,
int
init_method
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
InElementOp
&
in_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
{
constexpr
ck
::
index_t
NumDs
=
1
;
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
OutDataType
>
out_host
(
out_g_n_k_wos_desc
);
std
::
cout
<<
"in: "
<<
in
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"out: "
<<
out_host
.
mDesc
<<
std
::
endl
;
switch
(
init_method
)
{
case
0
:
break
;
case
1
:
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
2
,
2
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
2
,
2
});
out_host
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
2
,
2
});
break
;
default:
in
.
GenerateTensorValue
(
GeneratorTensor_3
<
InDataType
>
{
-
1.0
,
1.0
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
2
,
2
});
out_host
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
-
0.05
,
0.05
});
}
// Initialize based on out_host
Tensor
<
OutDataType
>
out_device
(
out_host
);
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out_device
.
mDesc
.
GetElementSpaceSize
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
out_device_buf
.
ToDevice
(
out_device
.
mData
.
data
());
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_c_wis_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_c_wis_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_k_wos_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_k_wos_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
const
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
a_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
a_g_n_c_wis_strides
);
copy
(
wei_g_k_c_xs_desc
.
GetLengths
(),
b_g_k_c_xs_lengths
);
copy
(
wei_g_k_c_xs_desc
.
GetStrides
(),
b_g_k_c_xs_strides
);
copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
e_g_n_k_wos_lengths
);
copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
e_g_n_k_wos_strides
);
copy
(
conv_param
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_param
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_param
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_param
.
input_right_pads_
,
input_right_pads
);
// Use output as D
const
std
::
array
<
const
void
*
,
NumDs
>
ds
=
{
out_device_buf
.
GetDeviceBuffer
()};
auto
conv
=
DeviceConvNDFwdInstance
{};
auto
invoker
=
conv
.
MakeInvoker
();
auto
argument
=
conv
.
MakeArgument
(
in_device_buf
.
GetDeviceBuffer
(),
wei_device_buf
.
GetDeviceBuffer
(),
ds
,
out_device_buf
.
GetDeviceBuffer
(),
a_g_n_c_wis_lengths
,
a_g_n_c_wis_strides
,
b_g_k_c_xs_lengths
,
b_g_k_c_xs_strides
,
std
::
array
<
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
,
NumDs
>
{
e_g_n_k_wos_lengths
},
std
::
array
<
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
,
NumDs
>
{
e_g_n_k_wos_strides
},
e_g_n_k_wos_lengths
,
e_g_n_k_wos_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
in_element_op
,
wei_element_op
,
out_element_op
);
if
(
!
conv
.
IsSupportedArgument
(
argument
))
{
throw
std
::
runtime_error
(
"The device op with the specified compilation parameters does "
"not support this convolution problem."
);
}
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
conv_param
.
GetFlops
()
+
3
*
conv_param
.
GetOutputByte
<
OutDataType
>
()
/
sizeof
(
OutDataType
);
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
()
+
conv_param
.
GetOutputByte
<
OutDataType
>
();
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
avg_time
;
std
::
cout
<<
"Perf: "
<<
avg_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
conv
.
GetTypeString
()
<<
std
::
endl
;
if
(
do_verification
)
{
std
::
array
<
Tensor
<
OutDataType
>
,
NumDs
>
d_tensors
=
{
out_host
};
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
0
,
/*Num A Elementwise Tensors*/
0
,
/*Num B Elementwise Tensors*/
NumDs
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in
,
wei
,
out_host
,
conv_param
.
conv_filter_strides_
,
conv_param
.
conv_filter_dilations_
,
conv_param
.
input_left_pads_
,
conv_param
.
input_right_pads_
,
in_element_op
,
wei_element_op
,
out_element_op
,
{},
{},
d_tensors
);
ref_invoker
.
Run
(
ref_argument
);
out_device_buf
.
FromDevice
(
out_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
out_device
,
out_host
,
"Error: incorrect results!"
);
}
return
true
;
}
}
// namespace
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_conv
_fw
d_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp
→
example/62_conv
n
d_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm>
#include <algorithm>
#include <cstdlib>
#include <cstdlib>
...
@@ -97,7 +97,7 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -97,7 +97,7 @@ using DeviceGroupedConvNDFwdInstance =
S
<
1
,
32
,
1
,
8
>
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
8
>
;
using
DeviceGroupedConvND
Fwd
ActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
namespace
{
namespace
{
// Use custom implementation to pass two more tensors for post op
// Use custom implementation to pass two more tensors for post op
...
@@ -109,16 +109,16 @@ template <ck::index_t NDimSpatial,
...
@@ -109,16 +109,16 @@ template <ck::index_t NDimSpatial,
typename
WeiElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
OutElementOp
,
typename
DeviceConvNDFwdInstance
>
typename
DeviceConvNDFwdInstance
>
bool
run_grouped_conv
_fwd
(
bool
do_verification
,
bool
run_grouped_conv
(
bool
do_verification
,
int
init_method
,
int
init_method
,
bool
time_kernel
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
InElementOp
&
in_element_op
,
const
InElementOp
&
in_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
const
OutElementOp
&
out_element_op
)
{
{
constexpr
ck
::
index_t
NumDs
=
2
;
constexpr
ck
::
index_t
NumDs
=
2
;
const
ck
::
index_t
G
=
out_g_n_k_wos_desc
.
GetLengths
()[
0
];
const
ck
::
index_t
G
=
out_g_n_k_wos_desc
.
GetLengths
()[
0
];
...
@@ -289,6 +289,6 @@ bool run_grouped_conv_fwd(bool do_verification,
...
@@ -289,6 +289,6 @@ bool run_grouped_conv_fwd(bool do_verification,
}
// namespace
}
// namespace
#include "run_convnd_
fwd_
activ_example.inc"
#include "run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_
fwd_
example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_conv
_fw
d_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
→
example/62_conv
n
d_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <cstdlib>
#include <iostream>
#include <iostream>
...
@@ -94,7 +94,7 @@ using DeviceGroupedConvNDFwdInstance =
...
@@ -94,7 +94,7 @@ using DeviceGroupedConvNDFwdInstance =
S
<
1
,
32
,
1
,
8
>
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
8
>
;
using
DeviceGroupedConvND
Fwd
ActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
namespace
{
namespace
{
// Use custom implementation to pass two more tensors for post op
// Use custom implementation to pass two more tensors for post op
...
@@ -106,16 +106,16 @@ template <ck::index_t NDimSpatial,
...
@@ -106,16 +106,16 @@ template <ck::index_t NDimSpatial,
typename
WeiElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
OutElementOp
,
typename
DeviceConvNDFwdInstance
>
typename
DeviceConvNDFwdInstance
>
bool
run_grouped_conv
_fwd
(
bool
do_verification
,
bool
run_grouped_conv
(
bool
do_verification
,
int
init_method
,
int
init_method
,
bool
time_kernel
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
InElementOp
&
in_element_op
,
const
InElementOp
&
in_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
const
OutElementOp
&
out_element_op
)
{
{
constexpr
ck
::
index_t
NumDs
=
2
;
constexpr
ck
::
index_t
NumDs
=
2
;
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
...
@@ -265,6 +265,6 @@ bool run_grouped_conv_fwd(bool do_verification,
...
@@ -265,6 +265,6 @@ bool run_grouped_conv_fwd(bool do_verification,
}
// namespace
}
// namespace
#include "run_convnd_
fwd_
activ_example.inc"
#include "run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_
fwd_
example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/multi_AB/CMakeLists.txt
0 → 100644
View file @
bf98b476
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_custom_target
(
example_convnd_activ_multi_ab_xdl
)
# ScaleAdd on A and B
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_fp16 conv_fwd_xdl_scaleadd_ab_fp16.cpp
)
add_example_dependencies
(
example_convnd_activ_multi_ab_xdl example_conv_fwd_xdl_scaleadd_ab_fp16
)
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_fp32 conv_fwd_xdl_scaleadd_ab_fp32.cpp
)
add_example_dependencies
(
example_convnd_activ_multi_ab_xdl example_conv_fwd_xdl_scaleadd_ab_fp32
)
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_bf16 conv_fwd_xdl_scaleadd_ab_bf16.cpp
)
add_example_dependencies
(
example_convnd_activ_multi_ab_xdl example_conv_fwd_xdl_scaleadd_ab_bf16
)
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_int8 conv_fwd_xdl_scaleadd_ab_int8.cpp
)
add_example_dependencies
(
example_convnd_activ_multi_ab_xdl example_conv_fwd_xdl_scaleadd_ab_int8
)
set
(
target 1
)
endif
()
endforeach
()
example/62_conv
_fw
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp
→
example/62_conv
n
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_multi_ab_common.hpp"
#include "convnd_fwd_activ_multi_ab_common.hpp"
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
DeviceGroupedConvND
Fwd
ActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
AccDataType
,
AccDataType
,
ADataTypes
,
ADataTypes
,
BDataTypes
,
BDataTypes
,
InElementOp
,
InElementOp
,
WeiElementOp
>
;
WeiElementOp
>
;
#include "../run_convnd_
fwd_
activ_example.inc"
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_
fwd_
example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_conv
_fw
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp
→
example/62_conv
n
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_multi_ab_common.hpp"
#include "convnd_fwd_activ_multi_ab_common.hpp"
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
DeviceGroupedConvND
Fwd
ActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
AccDataType
,
AccDataType
,
ADataTypes
,
ADataTypes
,
BDataTypes
,
BDataTypes
,
InElementOp
,
InElementOp
,
WeiElementOp
>
;
WeiElementOp
>
;
#include "../run_convnd_
fwd_
activ_example.inc"
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_
fwd_
example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_conv
_fw
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp
→
example/62_conv
n
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_multi_ab_common.hpp"
#include "convnd_fwd_activ_multi_ab_common.hpp"
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
DeviceGroupedConvND
Fwd
ActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
AccDataType
,
AccDataType
,
ADataTypes
,
ADataTypes
,
BDataTypes
,
BDataTypes
,
InElementOp
,
InElementOp
,
WeiElementOp
>
;
WeiElementOp
>
;
#include "../run_convnd_
fwd_
activ_example.inc"
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_
fwd_
example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_conv
_fw
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp
→
example/62_conv
n
d_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_multi_ab_common.hpp"
#include "convnd_fwd_activ_multi_ab_common.hpp"
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
...
@@ -14,13 +14,13 @@ using BDataTypes = ck::Tuple<DataType, DataType>;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
ScaleAdd
;
using
DeviceGroupedConvND
Fwd
ActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDMultiABFwdInstance
<
DataType
,
AccDataType
,
AccDataType
,
ADataTypes
,
ADataTypes
,
BDataTypes
,
BDataTypes
,
InElementOp
,
InElementOp
,
WeiElementOp
>
;
WeiElementOp
>
;
#include "../run_convnd_
fwd_
activ_example.inc"
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_
fwd_
example
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_conv
_fw
d_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp
→
example/62_conv
n
d_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <cstdlib>
#include <iostream>
#include <iostream>
...
@@ -100,16 +100,16 @@ template <ck::index_t NDimSpatial,
...
@@ -100,16 +100,16 @@ template <ck::index_t NDimSpatial,
typename
WeiElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
OutElementOp
,
typename
DeviceConvNDFwdInstance
>
typename
DeviceConvNDFwdInstance
>
bool
run_grouped_conv
_fwd
(
bool
do_verification
,
bool
run_grouped_conv
(
bool
do_verification
,
int
init_method
,
int
init_method
,
bool
time_kernel
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
InElementOp
&
in_element_op
,
const
InElementOp
&
in_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
const
OutElementOp
&
out_element_op
)
{
{
constexpr
ck
::
index_t
NumAs
=
2
;
constexpr
ck
::
index_t
NumAs
=
2
;
constexpr
ck
::
index_t
NumBs
=
2
;
constexpr
ck
::
index_t
NumBs
=
2
;
...
...
example/62_conv
_fw
d_activ/run_convnd_
fwd_
activ_example.inc
→
example/62_conv
n
d_activ/run_convnd_activ_example.inc
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -11,7 +11,7 @@ void print_helper_msg()
...
@@ -11,7 +11,7 @@ void print_helper_msg()
<<
ck
::
utils
::
conv
::
get_conv_param_parser_helper_msg
()
<<
std
::
endl
;
<<
ck
::
utils
::
conv
::
get_conv_param_parser_helper_msg
()
<<
std
::
endl
;
}
}
bool
run_convnd_
fwd_
example
(
int
argc
,
char
*
argv
[])
bool
run_convnd_example
(
int
argc
,
char
*
argv
[])
{
{
print_helper_msg
();
print_helper_msg
();
...
@@ -63,23 +63,23 @@ bool run_convnd_fwd_example(int argc, char* argv[])
...
@@ -63,23 +63,23 @@ bool run_convnd_fwd_example(int argc, char* argv[])
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
conv_param
);
return
run_grouped_conv
_fwd
<
NDimSpatial
,
return
run_grouped_conv
<
NDimSpatial
,
InDataType
,
InDataType
,
WeiDataType
,
WeiDataType
,
OutDataType
,
OutDataType
,
InElementOp
,
InElementOp
,
WeiElementOp
,
WeiElementOp
,
OutElementOp
,
OutElementOp
,
DeviceGroupedConvND
Fwd
ActivInstance
>
(
do_verification
,
DeviceGroupedConvNDActivInstance
>
(
do_verification
,
init_method
,
init_method
,
time_kernel
,
time_kernel
,
conv_param
,
conv_param
,
in_g_n_c_wis_desc
,
in_g_n_c_wis_desc
,
wei_g_k_c_xs_desc
,
wei_g_k_c_xs_desc
,
out_g_n_k_wos_desc
,
out_g_n_k_wos_desc
,
in_element_op
,
in_element_op
,
wei_element_op
,
wei_element_op
,
out_element_op
);
out_element_op
);
};
};
if
(
conv_param
.
num_dim_spatial_
==
3
)
if
(
conv_param
.
num_dim_spatial_
==
3
)
...
...
example/62_conv
_fw
d_activ/CMakeLists.txt
→
example/62_conv
n
d_activ/
unary/
CMakeLists.txt
View file @
bf98b476
...
@@ -2,48 +2,34 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
...
@@ -2,48 +2,34 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
add_custom_target
(
example_convnd_
fwd_
activ_xdl
)
add_custom_target
(
example_convnd_activ_
unary_
xdl
)
# Sigmoid
# Sigmoid
add_example_executable
(
example_convnd_fwd_xdl_sigmoid_fp16 convnd_fwd_xdl_sigmoid_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_sigmoid_fp16 convnd_fwd_xdl_sigmoid_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_sigmoid_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_sigmoid_fp16
)
# Tanh
# Tanh
add_example_executable
(
example_convnd_fwd_xdl_tanh_fp16 convnd_fwd_xdl_tanh_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_tanh_fp16 convnd_fwd_xdl_tanh_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_tanh_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_tanh_fp16
)
# Relu
# Relu
add_example_executable
(
example_convnd_fwd_xdl_relu_fp16 convnd_fwd_xdl_relu_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_relu_fp16 convnd_fwd_xdl_relu_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_relu_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_relu_fp16
)
# SoftRelu
# SoftRelu
add_example_executable
(
example_convnd_fwd_xdl_softrelu_fp16 convnd_fwd_xdl_softrelu_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_softrelu_fp16 convnd_fwd_xdl_softrelu_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_softrelu_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_softrelu_fp16
)
# Abs
# Abs
add_example_executable
(
example_convnd_fwd_xdl_abs_fp16 convnd_fwd_xdl_abs_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_abs_fp16 convnd_fwd_xdl_abs_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_abs_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_abs_fp16
)
# Pow
# Pow
add_example_executable
(
example_convnd_fwd_xdl_pow_fp16 convnd_fwd_xdl_pow_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_pow_fp16 convnd_fwd_xdl_pow_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_pow_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_pow_fp16
)
# Clipped Relu
# Clipped Relu
add_example_executable
(
example_convnd_fwd_xdl_clippedrelu_fp16 convnd_fwd_xdl_clippedrelu_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_clippedrelu_fp16 convnd_fwd_xdl_clippedrelu_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_clippedrelu_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_clippedrelu_fp16
)
# Leaky Relu
# Leaky Relu
add_example_executable
(
example_convnd_fwd_xdl_leakyrelu_fp16 convnd_fwd_xdl_leakyrelu_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_leakyrelu_fp16 convnd_fwd_xdl_leakyrelu_fp16.cpp
)
add_example_dependencies
(
example_convnd_
fwd_
activ_xdl example_convnd_fwd_xdl_leakyrelu_fp16
)
add_example_dependencies
(
example_convnd_activ_
unary_
xdl example_convnd_fwd_xdl_leakyrelu_fp16
)
# Elu
# Elu
add_example_executable
(
example_convnd_fwd_xdl_elu_fp16 convnd_fwd_xdl_elu_fp16.cpp
)
add_example_executable
(
example_convnd_fwd_xdl_elu_fp16 convnd_fwd_xdl_elu_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_convnd_fwd_xdl_elu_fp16
)
add_example_dependencies
(
example_convnd_activ_unary_xdl example_convnd_fwd_xdl_elu_fp16
)
# ScaleAdd on A and B
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_fp16 multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_conv_fwd_xdl_scaleadd_ab_fp16
)
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_fp32 multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_conv_fwd_xdl_scaleadd_ab_fp32
)
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_bf16 multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_conv_fwd_xdl_scaleadd_ab_bf16
)
add_example_executable
(
example_conv_fwd_xdl_scaleadd_ab_int8 multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_conv_fwd_xdl_scaleadd_ab_int8
)
# ScaleAdd ScaleAdd Relu
add_example_executable
(
example_convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16 convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16
)
add_example_executable
(
example_convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16 convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16
)
set
(
target 1
)
set
(
target 1
)
endif
()
endif
()
endforeach
()
endforeach
()
example/62_conv
_fw
d_activ/convnd_fwd_activ_common.hpp
→
example/62_conv
n
d_activ/
unary/
convnd_fwd_activ_
unary_
common.hpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -102,16 +102,16 @@ template <ck::index_t NDimSpatial,
...
@@ -102,16 +102,16 @@ template <ck::index_t NDimSpatial,
typename
WeiElementOp
,
typename
WeiElementOp
,
typename
OutElementOp
,
typename
OutElementOp
,
typename
DeviceConvNDFwdInstance
>
typename
DeviceConvNDFwdInstance
>
bool
run_grouped_conv
_fwd
(
bool
do_verification
,
bool
run_grouped_conv
(
bool
do_verification
,
int
init_method
,
int
init_method
,
bool
time_kernel
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
InElementOp
&
in_element_op
,
const
InElementOp
&
in_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
OutElementOp
&
out_element_op
)
const
OutElementOp
&
out_element_op
)
{
{
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
InDataType
>
in
(
in_g_n_c_wis_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
...
...
example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
UnaryAbs
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment