Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
926fd388
Commit
926fd388
authored
Nov 02, 2022
by
Chao Liu
Committed by
GitHub
Nov 02, 2022
Browse files
Revert "Add client example of grouped conv2d backward data (data type: fp16) (#481)"
This reverts commit
9e57a290
.
parent
79aa3fb1
Changes
15
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
241 additions
and
1002 deletions
+241
-1002
client_example/09_grouped_conv2d_bwd_data/CMakeLists.txt
client_example/09_grouped_conv2d_bwd_data/CMakeLists.txt
+0
-2
client_example/09_grouped_conv2d_bwd_data/grouped_conv2d_bwd_data.cpp
...le/09_grouped_conv2d_bwd_data/grouped_conv2d_bwd_data.cpp
+0
-218
example/38_grouped_conv_bwd_data_bias_relu/CMakeLists.txt
example/38_grouped_conv_bwd_data_bias_relu/CMakeLists.txt
+1
-0
example/38_grouped_conv_bwd_data_bias_relu/grouped_conv_bwd_data_bias_relu_common.hpp
...data_bias_relu/grouped_conv_bwd_data_bias_relu_common.hpp
+66
-91
example/38_grouped_conv_bwd_data_bias_relu/grouped_conv_bwd_data_bias_relu_fp16.cpp
...d_data_bias_relu/grouped_conv_bwd_data_bias_relu_fp16.cpp
+174
-0
example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt
example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt
+0
-7
example/38_grouped_conv_bwd_data_multiple_d/common.hpp
example/38_grouped_conv_bwd_data_multiple_d/common.hpp
+0
-102
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_fp16.cpp
..._data_multiple_d/grouped_conv_bwd_data_bias_relu_fp16.cpp
+0
-33
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_fp16.cpp
...d_conv_bwd_data_multiple_d/grouped_conv_bwd_data_fp16.cpp
+0
-33
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
...bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
+0
-190
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp
...sor_operation/gpu/device/device_grouped_conv_bwd_data.hpp
+0
-49
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
...on/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
+0
-95
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp
...ration_instance/gpu/grouped_convolution_backward_data.hpp
+0
-82
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt
...ation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt
+0
-3
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
...ed_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
+0
-97
No files found.
client_example/09_grouped_conv2d_bwd_data/CMakeLists.txt
deleted
100644 → 0
View file @
79aa3fb1
add_executable
(
client_grouped_conv2d_bwd_data grouped_conv2d_bwd_data.cpp
)
target_link_libraries
(
client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_operations
)
client_example/09_grouped_conv2d_bwd_data/grouped_conv2d_bwd_data.cpp
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <numeric>
#include <vector>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_conv_fwd.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using
InDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
2
;
static
constexpr
ck
::
index_t
G
=
32
;
static
constexpr
ck
::
index_t
N
=
256
;
static
constexpr
ck
::
index_t
K
=
192
;
static
constexpr
ck
::
index_t
C
=
192
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Wi
=
28
;
static
constexpr
ck
::
index_t
Ho
=
28
;
static
constexpr
ck
::
index_t
Wo
=
28
;
struct
SimpleDeviceMem
{
SimpleDeviceMem
()
=
delete
;
SimpleDeviceMem
(
std
::
size_t
mem_size
)
:
p_mem_
{}
{
(
void
)
hipMalloc
(
static_cast
<
void
**>
(
&
p_mem_
),
mem_size
);
}
void
*
GetDeviceBuffer
()
{
return
p_mem_
;
}
~
SimpleDeviceMem
()
{
(
void
)
hipFree
(
p_mem_
);
}
void
*
p_mem_
;
};
int
main
()
{
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
in_lengths
{
G
,
N
,
Hi
,
Wi
,
C
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
in_strides
{
0
,
0
,
0
,
0
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
wei_lengths
{
G
,
K
,
Y
,
X
,
C
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
wei_strides
{
0
,
0
,
0
,
0
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
out_lengths
{
G
,
N
,
Ho
,
Wo
,
K
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
out_strides
{
0
,
0
,
0
,
0
,
1
};
std
::
partial_sum
(
rbegin
(
in_lengths
),
std
::
prev
(
rend
(
in_lengths
)),
std
::
next
(
rbegin
(
in_strides
)),
std
::
multiplies
<>
{});
std
::
partial_sum
(
rbegin
(
wei_lengths
),
std
::
prev
(
rend
(
wei_lengths
)),
std
::
next
(
rbegin
(
wei_strides
)),
std
::
multiplies
<>
{});
std
::
partial_sum
(
rbegin
(
out_lengths
),
std
::
prev
(
rend
(
out_lengths
)),
std
::
next
(
rbegin
(
out_strides
)),
std
::
multiplies
<>
{});
// transpose GNHWC/GKYXC/GNHWK to GNCHW/GKCYX/GNCHW
std
::
rotate
(
rbegin
(
in_lengths
),
std
::
next
(
rbegin
(
in_lengths
)),
std
::
next
(
rbegin
(
in_lengths
),
3
));
std
::
rotate
(
rbegin
(
in_strides
),
std
::
next
(
rbegin
(
in_strides
)),
std
::
next
(
rbegin
(
in_strides
),
3
));
std
::
rotate
(
rbegin
(
wei_lengths
),
std
::
next
(
rbegin
(
wei_lengths
)),
std
::
next
(
rbegin
(
wei_lengths
),
3
));
std
::
rotate
(
rbegin
(
wei_strides
),
std
::
next
(
rbegin
(
wei_strides
)),
std
::
next
(
rbegin
(
wei_strides
),
3
));
std
::
rotate
(
rbegin
(
out_lengths
),
std
::
next
(
rbegin
(
out_lengths
)),
std
::
next
(
rbegin
(
out_lengths
),
3
));
std
::
rotate
(
rbegin
(
out_strides
),
std
::
next
(
rbegin
(
out_strides
)),
std
::
next
(
rbegin
(
out_strides
),
3
));
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_left_pads
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_right_pads
{
1
,
1
};
SimpleDeviceMem
in
(
sizeof
(
InDataType
)
*
G
*
N
*
Hi
*
Wi
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
G
*
N
*
Ho
*
Wo
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdData
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
PassThrough
>
;
// get device op instances
const
auto
op_ptrs
=
ck
::
tensor_operation
::
device
::
instance
::
DeviceOperationInstanceFactory
<
DeviceOp
>::
GetInstances
();
std
::
cout
<<
"found "
<<
op_ptrs
.
size
()
<<
" instances"
<<
std
::
endl
;
std
::
string
best_op_name
;
int
best_op_id
=
-
1
;
float
best_avg_time
=
std
::
numeric_limits
<
float
>::
max
();
float
best_gb_per_sec
=
0
;
float
best_tflops
=
0
;
// profile device operation instances
std
::
cout
<<
"Run all instances and do timing"
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
op_ptrs
.
size
();
++
i
)
{
auto
&
op_ptr
=
op_ptrs
[
i
];
auto
argument_ptr
=
op_ptr
->
MakeArgumentPointer
(
in
.
GetDeviceBuffer
(),
wei
.
GetDeviceBuffer
(),
out
.
GetDeviceBuffer
(),
in_lengths
,
in_strides
,
wei_lengths
,
wei_strides
,
out_lengths
,
out_strides
,
filter_strides
,
filter_dilations
,
input_left_pads
,
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
auto
invoker_ptr
=
op_ptr
->
MakeInvokerPointer
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
true
});
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
G
*
N
*
K
*
C
*
Ho
*
Wo
*
Y
*
X
;
std
::
size_t
num_bytes
=
sizeof
(
InDataType
)
*
G
*
N
*
Hi
*
Wi
*
C
+
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
+
sizeof
(
OutDataType
)
*
G
*
N
*
Ho
*
Wo
*
K
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_bytes
/
1.E6
/
avg_time
;
std
::
cout
<<
"Perf: "
<<
std
::
setw
(
10
)
<<
avg_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
op_name
<<
std
::
endl
;
if
(
tflops
>
best_tflops
)
{
best_op_id
=
i
;
best_op_name
=
op_name
;
best_avg_time
=
avg_time
;
best_gb_per_sec
=
gb_per_sec
;
best_tflops
=
tflops
;
}
}
else
{
std
::
cerr
<<
op_name
<<
" does not support this problem"
<<
std
::
endl
;
}
}
if
(
best_op_id
<
0
)
{
std
::
cerr
<<
"no suitable instance"
<<
std
::
endl
;
return
EXIT_FAILURE
;
}
std
::
cout
<<
"Best Perf: "
<<
std
::
setw
(
10
)
<<
best_avg_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_op_name
<<
std
::
endl
;
// run the best intance
{
auto
&
op_ptr
=
op_ptrs
[
best_op_id
];
std
::
cout
<<
"Run the best instance without timing: "
<<
op_ptr
->
GetTypeString
()
<<
std
::
endl
;
auto
argument_ptr
=
op_ptr
->
MakeArgumentPointer
(
in
.
GetDeviceBuffer
(),
wei
.
GetDeviceBuffer
(),
out
.
GetDeviceBuffer
(),
in_lengths
,
in_strides
,
wei_lengths
,
wei_strides
,
out_lengths
,
out_strides
,
filter_strides
,
filter_dilations
,
input_left_pads
,
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
auto
invoker_ptr
=
op_ptr
->
MakeInvokerPointer
();
if
(
op_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
false
});
}
std
::
cout
<<
"Done"
<<
std
::
endl
;
}
}
example/38_grouped_conv_bwd_data_bias_relu/CMakeLists.txt
0 → 100644
View file @
926fd388
add_example_executable
(
example_grouped_conv_bwd_data_bias_relu_fp16 grouped_conv_bwd_data_bias_relu_fp16.cpp
)
example/38_grouped_conv_bwd_data_
multiple_d/run_
grouped_conv_bwd_data_bias_relu_
example.inc
→
example/38_grouped_conv_bwd_data_
bias_relu/
grouped_conv_bwd_data_bias_relu_
common.hpp
View file @
926fd388
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
bool
run_conv_bwd_data_bias_relu
(
const
ExecutionConfig
&
config
,
#include <iostream>
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
,
#include <numeric>
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
#include <initializer_list>
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
#include <cstdlib>
const
HostTensorDescriptor
&
bias_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
#include "ck/ck.hpp"
const
OutElementOp
&
out_element_op
,
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
const
WeiElementOp
&
wei_element_op
,
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
const
InElementOp
&
in_element_op
)
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp"
void
print_helper_msg
()
{
std
::
cout
<<
"arg1: verification (0=no, 1=yes)
\n
"
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
<<
"arg3: time kernel (0=no, 1=yes)
\n
"
<<
ck
::
utils
::
conv
::
get_conv_param_parser_helper_msg
()
<<
std
::
endl
;
}
template
<
ck
::
index_t
NDimSpatial
,
typename
OutDataType
,
typename
WeiDataType
,
typename
BiasDataType
,
typename
InDataType
,
typename
OutElementOp
,
typename
WeiElementOp
,
typename
InElementOp
,
typename
DeviceInstance
>
int
run_conv_bwd_data_bias_relu
(
bool
do_verification
,
int
init_method
,
bool
time_kernel
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_param
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
bias_g_n_c_wis_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
OutElementOp
&
out_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
InElementOp
&
in_element_op
)
{
{
Tensor
<
OutDataType
>
out
(
out_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
out
(
out_g_n_k_wos_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
...
@@ -22,7 +58,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
...
@@ -22,7 +58,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
std
::
cout
<<
"bias: "
<<
bias
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"bias: "
<<
bias
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"in: "
<<
in_host
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"in: "
<<
in_host
.
mDesc
<<
std
::
endl
;
switch
(
config
.
init_method
)
switch
(
init_method
)
{
{
case
0
:
break
;
case
0
:
break
;
case
1
:
case
1
:
...
@@ -71,15 +107,13 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
...
@@ -71,15 +107,13 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
copy
(
bias_g_n_c_wis_desc
.
GetStrides
(),
d0_g_n_c_wis_strides
);
copy
(
bias_g_n_c_wis_desc
.
GetStrides
(),
d0_g_n_c_wis_strides
);
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
e_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
e_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
e_g_n_c_wis_strides
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
e_g_n_c_wis_strides
);
copy
(
conv_params
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_param
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_params
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_param
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_params
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_param
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_params
.
input_right_pads_
,
input_right_pads
);
copy
(
conv_param
.
input_right_pads_
,
input_right_pads
);
static_assert
(
std
::
is_default_constructible_v
<
DeviceConvInstance
>
);
// do conv
// do conv
auto
conv
=
Device
Conv
Instance
{};
auto
conv
=
DeviceInstance
{};
auto
invoker
=
conv
.
MakeInvoker
();
auto
invoker
=
conv
.
MakeInvoker
();
auto
argument
=
conv
.
MakeArgument
(
auto
argument
=
conv
.
MakeArgument
(
out_device_buf
.
GetDeviceBuffer
(),
out_device_buf
.
GetDeviceBuffer
(),
...
@@ -104,17 +138,16 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
...
@@ -104,17 +138,16 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
if
(
!
conv
.
IsSupportedArgument
(
argument
))
if
(
!
conv
.
IsSupportedArgument
(
argument
))
{
{
std
::
cerr
<<
"wrong! device_conv with the specified compilation parameters does "
printf
(
"wrong! device_conv with the specified compilation parameters does "
"not support this Conv problem"
"not support this Conv problem
\n
"
);
<<
std
::
endl
;
return
false
;
return
1
;
}
}
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
conv_param
s
.
GetFlops
();
std
::
size_t
flop
=
conv_param
.
GetFlops
();
std
::
size_t
num_btype
=
conv_param
s
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
();
std
::
size_t
num_btype
=
conv_param
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
();
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
...
@@ -123,8 +156,10 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
...
@@ -123,8 +156,10 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s"
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
<<
std
::
endl
;
if
(
config
.
do_verification
)
if
(
do_verification
)
{
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
// c doesn't physically exist, any layout is fine
// c doesn't physically exist, any layout is fine
Tensor
<
float
>
c_host
(
in_g_n_c_wis_desc
);
Tensor
<
float
>
c_host
(
in_g_n_c_wis_desc
);
...
@@ -141,10 +176,10 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
...
@@ -141,10 +176,10 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
c_host
,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
c_host
,
wei
,
wei
,
out
,
out
,
conv_param
s
.
conv_filter_strides_
,
conv_param
.
conv_filter_strides_
,
conv_param
s
.
conv_filter_dilations_
,
conv_param
.
conv_filter_dilations_
,
conv_param
s
.
input_left_pads_
,
conv_param
.
input_left_pads_
,
conv_param
s
.
input_right_pads_
,
conv_param
.
input_right_pads_
,
PassThrough
{},
PassThrough
{},
wei_element_op
,
wei_element_op
,
out_element_op
);
out_element_op
);
...
@@ -157,68 +192,8 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
...
@@ -157,68 +192,8 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
in_host
.
mData
);
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
in_host
.
mData
)
?
0
:
1
;
}
return
true
;
}
int
run_grouped_conv_bwd_data_bias_relu_example
(
int
argc
,
char
*
argv
[])
{
namespace
ctc
=
ck
::
tensor_layout
::
convolution
;
ExecutionConfig
config
;
ck
::
utils
::
conv
::
ConvParam
conv_params
=
DefaultConvParams
;
if
(
!
parse_cmd_args
(
argc
,
argv
,
config
,
conv_params
))
{
return
EXIT_FAILURE
;
}
const
auto
in_element_op
=
InElementOp
{};
const
auto
wei_element_op
=
WeiElementOp
{};
const
auto
out_element_op
=
OutElementOp
{};
if
(
conv_params
.
num_dim_spatial_
!=
NDimSpatial
)
{
std
::
cerr
<<
"unsupported # of spatials dimensions"
<<
std
::
endl
;
return
EXIT_FAILURE
;
}
}
// output image: GNHWK
return
0
;
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_params
);
// weight: GKYXC
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_params
);
// input image bias: G_C
const
auto
bias_g_n_c_wis_desc
=
HostTensorDescriptor
({
conv_params
.
G_
,
conv_params
.
N_
,
conv_params
.
C_
,
conv_params
.
input_spatial_lengths_
[
0
],
conv_params
.
input_spatial_lengths_
[
1
]},
{
conv_params
.
C_
,
// g
0
,
// n
1
,
// c
0
,
// hi
0
// wi
});
// input image: GNHWC
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_params
);
return
!
run_conv_bwd_data_bias_relu
(
config
,
conv_params
,
out_g_n_k_wos_desc
,
wei_g_k_c_xs_desc
,
bias_g_n_c_wis_desc
,
in_g_n_c_wis_desc
,
wei_element_op
,
out_element_op
,
in_element_op
);
}
}
example/38_grouped_conv_bwd_data_bias_relu/grouped_conv_bwd_data_bias_relu_fp16.cpp
0 → 100644
View file @
926fd388
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "grouped_conv_bwd_data_bias_relu_common.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
OutDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
using
CShuffleDataType
=
ck
::
half_t
;
using
BiasDataType
=
ck
::
half_t
;
// bias
using
InDataType
=
ck
::
half_t
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
BiasLayout
=
ck
::
tensor_layout
::
convolution
::
G_C
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CBiasInElementOp
=
ck
::
tensor_operation
::
element_wise
::
AddRelu
;
static
constexpr
auto
ConvBwdDataDefault
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardDataSpecialization
::
Default
;
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvNdBwdDataInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
ck
::
Tuple
<
BiasLayout
>
,
InLayout
,
OutDataType
,
WeiDataType
,
AccDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
>
,
InDataType
,
OutElementOp
,
WeiElementOp
,
CBiasInElementOp
,
ConvBwdDataDefault
,
true
,
// DoPadGemmM
true
,
// DoPadGemmN
1
,
256
,
128
,
256
,
32
,
8
,
2
,
32
,
32
,
2
,
4
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
1
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
2
,
0
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
int
main
(
int
argc
,
char
*
argv
[])
{
namespace
ctc
=
ck
::
tensor_layout
::
convolution
;
print_helper_msg
();
bool
do_verification
=
true
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
ck
::
utils
::
conv
::
ConvParam
conv_param
{
2
,
2
,
128
,
256
,
256
,
{
3
,
3
},
{
14
,
14
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
if
(
argc
==
1
)
{
// use default
}
else
if
(
argc
==
4
)
{
do_verification
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
time_kernel
=
std
::
stoi
(
argv
[
3
]);
}
else
{
do_verification
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
time_kernel
=
std
::
stoi
(
argv
[
3
]);
const
ck
::
index_t
num_dim_spatial
=
std
::
stoi
(
argv
[
4
]);
conv_param
=
ck
::
utils
::
conv
::
parse_conv_param
(
num_dim_spatial
,
5
,
argv
);
}
const
auto
in_element_op
=
CBiasInElementOp
{};
const
auto
wei_element_op
=
WeiElementOp
{};
const
auto
out_element_op
=
OutElementOp
{};
if
(
conv_param
.
num_dim_spatial_
==
2
)
{
// output image: GNHWK
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
// weight: GKYXC
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_param
);
// input image bias: G_C
const
auto
bias_g_n_c_wis_desc
=
HostTensorDescriptor
({
conv_param
.
G_
,
conv_param
.
N_
,
conv_param
.
C_
,
conv_param
.
input_spatial_lengths_
[
0
],
conv_param
.
input_spatial_lengths_
[
1
]},
{
conv_param
.
C_
,
// g
0
,
// n
1
,
// c
0
,
// hi
0
// wi
});
// input image: GNHWC
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_param
);
using
DeviceInstance
=
DeviceConvNdBwdDataInstance
<
2
>
;
run_conv_bwd_data_bias_relu
<
2
,
OutDataType
,
WeiDataType
,
BiasDataType
,
InDataType
,
OutElementOp
,
WeiElementOp
,
CBiasInElementOp
,
DeviceInstance
>
(
do_verification
,
init_method
,
time_kernel
,
conv_param
,
out_g_n_k_wos_desc
,
wei_g_k_c_xs_desc
,
bias_g_n_c_wis_desc
,
in_g_n_c_wis_desc
,
wei_element_op
,
out_element_op
,
in_element_op
);
}
return
0
;
}
example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt
deleted
100644 → 0
View file @
79aa3fb1
add_custom_target
(
example_grouped_conv_bwd_data
)
add_example_executable
(
example_grouped_conv_bwd_data_fp16 grouped_conv_bwd_data_fp16.cpp
)
add_example_executable
(
example_grouped_conv_bwd_data_bias_relu_fp16 grouped_conv_bwd_data_bias_relu_fp16.cpp
)
add_dependencies
(
example_grouped_conv_bwd_data example_grouped_conv_bwd_data_fp16
)
add_dependencies
(
example_grouped_conv_bwd_data example_grouped_conv_bwd_data_bias_relu_fp16
)
example/38_grouped_conv_bwd_data_multiple_d/common.hpp
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdlib>
#include <initializer_list>
#include <iostream>
#include <numeric>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
static
inline
constexpr
ck
::
index_t
NDimSpatial
=
2
;
static
constexpr
auto
ConvBwdDataDefault
=
ck
::
tensor_operation
::
device
::
ConvolutionBackwardDataSpecialization
::
Default
;
using
FP16
=
ck
::
half_t
;
using
FP32
=
float
;
struct
ExecutionConfig
final
{
bool
do_verification
=
true
;
int
init_method
=
1
;
bool
time_kernel
=
true
;
};
#define DefaultConvParams \
ck::utils::conv::ConvParam \
{ \
NDimSpatial, 32, 4, 192, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, { 1, 1 } \
}
inline
void
print_help_msg
()
{
std
::
cerr
<<
"arg1: verification (0=no, 1=yes)
\n
"
<<
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
<<
"arg3: time kernel (0=no, 1=yes)
\n
"
<<
ck
::
utils
::
conv
::
get_conv_param_parser_helper_msg
()
<<
std
::
endl
;
}
inline
bool
parse_cmd_args
(
int
argc
,
char
*
argv
[],
ExecutionConfig
&
config
,
ck
::
utils
::
conv
::
ConvParam
&
conv_params
)
{
constexpr
int
num_execution_config_args
=
3
;
// arguments for do_verification, init_method, time_kernel
constexpr
int
num_conv_param_leading_args
=
5
;
// arguments for num_dim_spatial_, G_, N_, K_, C_
constexpr
int
threshold_to_catch_partial_args
=
1
+
num_execution_config_args
;
constexpr
int
threshold_to_catch_all_args
=
threshold_to_catch_partial_args
+
num_conv_param_leading_args
;
if
(
argc
==
1
)
{
// use default
config
=
ExecutionConfig
{};
}
// catch only ExecutionConfig arguments
else
if
(
argc
==
threshold_to_catch_partial_args
)
{
config
.
do_verification
=
std
::
stoi
(
argv
[
1
]);
config
.
init_method
=
std
::
stoi
(
argv
[
2
]);
config
.
time_kernel
=
std
::
stoi
(
argv
[
3
]);
}
// catch both ExecutionConfig & ConvParam arguments
else
if
(
threshold_to_catch_all_args
<
argc
&&
((
argc
-
threshold_to_catch_all_args
)
%
3
==
0
))
{
config
.
do_verification
=
std
::
stoi
(
argv
[
1
]);
config
.
init_method
=
std
::
stoi
(
argv
[
2
]);
config
.
time_kernel
=
std
::
stoi
(
argv
[
3
]);
const
ck
::
index_t
num_dim_spatial
=
std
::
stoi
(
argv
[
4
]);
conv_params
=
ck
::
utils
::
conv
::
parse_conv_param
(
num_dim_spatial
,
threshold_to_catch_partial_args
,
argv
);
}
else
{
print_help_msg
();
return
false
;
}
return
true
;
}
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_fp16.cpp
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
using
OutDataType
=
FP16
;
using
WeiDataType
=
FP16
;
using
AccDataType
=
FP32
;
using
CShuffleDataType
=
FP16
;
using
BiasDataType
=
FP16
;
// bias
using
InDataType
=
FP16
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
BiasLayout
=
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
G_C
>
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
OutElementOp
=
PassThrough
;
using
WeiElementOp
=
PassThrough
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
AddRelu
;
// clang-format off
using
DeviceConvInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
// ######| NDimSpatial| ALayout| BLayout| DsLayout| ELayout| AData| BData| AccData| CShuffle| DsData| EData| AElementwise| BElementwise| CDEElementwise| ConvolutionBackward| DoPad| DoPad| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffleMXdl| CShuffleNXdl| CDEBlockTransfer| CDEBlockTransfer|
// ######| | | | | | Type| Type| Type| DataType| Type| Type| Operation| Operation| Operation| DataSpecialization| GemmM| GemmN| PrefetchStage| Size| Block| Block| Block| | | XDL| XDL| PerWave| PerWave| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ExtraN| PerWave| PerWave| _MBlock_MPerBlock| ScalarPerVector|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | Lengths_AK0_M_AK1| ArrangeOrder| | | PerVector| PerVector_AK1| | Lengths_BK0_N_BK1| ArrangeOrder| | | PerVector| PerVector_BK1| | PerShuffle| PerShuffle| _NBlock_NPerBlock| _NPerBlock|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
BiasLayout
,
InLayout
,
OutDataType
,
WeiDataType
,
AccDataType
,
CShuffleDataType
,
ck
::
Tuple
<
BiasDataType
>
,
InDataType
,
OutElementOp
,
WeiElementOp
,
InElementOp
,
ConvBwdDataDefault
,
true
,
true
,
1
,
256
,
128
,
256
,
32
,
8
,
2
,
32
,
32
,
2
,
4
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
1
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
2
,
0
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
// clang-format on
#include "run_grouped_conv_bwd_data_bias_relu_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
run_grouped_conv_bwd_data_bias_relu_example
(
argc
,
argv
);
}
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_fp16.cpp
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
using
OutDataType
=
FP16
;
using
WeiDataType
=
FP16
;
using
AccDataType
=
FP32
;
using
CShuffleDataType
=
FP16
;
using
DsDataType
=
ck
::
Tuple
<>
;
using
InDataType
=
FP16
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
DsLayout
=
ck
::
Tuple
<>
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWC
;
using
OutElementOp
=
PassThrough
;
using
WeiElementOp
=
PassThrough
;
using
InElementOp
=
PassThrough
;
// clang-format off
using
DeviceConvInstance
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
// ######| NDimSpatial| ALayout| BLayout| DsLayout| ELayout| AData| BData| AccData| CShuffle| DsData| EData| AElementwise| BElementwise| CDEElementwise| ConvolutionBackward| DoPad| DoPad| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffleMXdl| CShuffleNXdl| CDEBlockTransfer| CDEBlockTransfer|
// ######| | | | | | Type| Type| Type| DataType| Type| Type| Operation| Operation| Operation| DataSpecialization| GemmM| GemmN| PrefetchStage| Size| Block| Block| Block| | | XDL| XDL| PerWave| PerWave| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ExtraN| PerWave| PerWave| _MBlock_MPerBlock| ScalarPerVector|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | Lengths_AK0_M_AK1| ArrangeOrder| | | PerVector| PerVector_AK1| | Lengths_BK0_N_BK1| ArrangeOrder| | | PerVector| PerVector_BK1| | PerShuffle| PerShuffle| _NBlock_NPerBlock| _NPerBlock|
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
DsLayout
,
InLayout
,
OutDataType
,
WeiDataType
,
AccDataType
,
CShuffleDataType
,
DsDataType
,
InDataType
,
OutElementOp
,
WeiElementOp
,
InElementOp
,
ConvBwdDataDefault
,
true
,
true
,
1
,
256
,
128
,
256
,
32
,
8
,
2
,
32
,
32
,
2
,
4
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
1
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
2
,
0
,
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
// clang-format on
#include "run_grouped_conv_bwd_data_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
run_grouped_conv_bwd_data_example
(
argc
,
argv
);
}
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
bool
run_conv_bwd_data
(
const
ExecutionConfig
&
config
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
,
const
HostTensorDescriptor
&
out_g_n_k_wos_desc
,
const
HostTensorDescriptor
&
wei_g_k_c_xs_desc
,
const
HostTensorDescriptor
&
in_g_n_c_wis_desc
,
const
OutElementOp
&
out_element_op
,
const
WeiElementOp
&
wei_element_op
,
const
InElementOp
&
in_element_op
)
{
Tensor
<
OutDataType
>
out
(
out_g_n_k_wos_desc
);
Tensor
<
WeiDataType
>
wei
(
wei_g_k_c_xs_desc
);
Tensor
<
InDataType
>
in_host
(
in_g_n_c_wis_desc
);
Tensor
<
InDataType
>
in_device
(
in_g_n_c_wis_desc
);
std
::
cout
<<
"out: "
<<
out
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei: "
<<
wei
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"in: "
<<
in_host
.
mDesc
<<
std
::
endl
;
switch
(
config
.
init_method
)
{
case
0
:
break
;
case
1
:
out
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
break
;
default
:
out
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
0.0
,
1.0
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
}
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_device
.
mDesc
.
GetElementSpaceSize
());
out_device_buf
.
ToDevice
(
out
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
// reset input to zero
in_device_buf
.
SetZero
();
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_k_wos_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_k_wos_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_c_wis_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_c_wis_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
(),
y
.
begin
());
};
copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
a_g_n_k_wos_lengths
);
copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
a_g_n_k_wos_strides
);
copy
(
wei_g_k_c_xs_desc
.
GetLengths
(),
b_g_k_c_xs_lengths
);
copy
(
wei_g_k_c_xs_desc
.
GetStrides
(),
b_g_k_c_xs_strides
);
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
e_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
e_g_n_c_wis_strides
);
copy
(
conv_params
.
conv_filter_strides_
,
conv_filter_strides
);
copy
(
conv_params
.
conv_filter_dilations_
,
conv_filter_dilations
);
copy
(
conv_params
.
input_left_pads_
,
input_left_pads
);
copy
(
conv_params
.
input_right_pads_
,
input_right_pads
);
static_assert
(
std
::
is_default_constructible_v
<
DeviceConvInstance
>
);
// do conv
auto
conv
=
DeviceConvInstance
{};
auto
invoker
=
conv
.
MakeInvoker
();
auto
argument
=
conv
.
MakeArgument
(
out_device_buf
.
GetDeviceBuffer
(),
wei_device_buf
.
GetDeviceBuffer
(),
std
::
array
<
const
void
*
,
0
>
{},
in_device_buf
.
GetDeviceBuffer
(),
a_g_n_k_wos_lengths
,
a_g_n_k_wos_strides
,
b_g_k_c_xs_lengths
,
b_g_k_c_xs_strides
,
std
::
array
<
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
,
0
>
{},
std
::
array
<
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
,
0
>
{},
e_g_n_c_wis_lengths
,
e_g_n_c_wis_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
out_element_op
,
wei_element_op
,
in_element_op
);
if
(
!
conv
.
IsSupportedArgument
(
argument
))
{
std
::
cerr
<<
"wrong! device_conv with the specified compilation parameters does "
"not support this Conv problem"
<<
std
::
endl
;
return
false
;
}
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
std
::
size_t
flop
=
conv_params
.
GetFlops
();
std
::
size_t
num_btype
=
conv_params
.
GetByte
<
InDataType
,
WeiDataType
,
OutDataType
>
();
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
if
(
config
.
do_verification
)
{
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvBwdData
<
NDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
PassThrough
,
WeiElementOp
,
OutElementOp
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in_host
,
wei
,
out
,
conv_params
.
conv_filter_strides_
,
conv_params
.
conv_filter_dilations_
,
conv_params
.
input_left_pads_
,
conv_params
.
input_right_pads_
,
PassThrough
{},
wei_element_op
,
out_element_op
);
ref_invoker
.
Run
(
ref_argument
);
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
in_host
.
mData
);
}
return
true
;
}
int
run_grouped_conv_bwd_data_example
(
int
argc
,
char
*
argv
[])
{
namespace
ctc
=
ck
::
tensor_layout
::
convolution
;
ExecutionConfig
config
;
ck
::
utils
::
conv
::
ConvParam
conv_params
=
DefaultConvParams
;
if
(
!
parse_cmd_args
(
argc
,
argv
,
config
,
conv_params
))
{
return
EXIT_FAILURE
;
}
const
auto
in_element_op
=
InElementOp
{};
const
auto
wei_element_op
=
WeiElementOp
{};
const
auto
out_element_op
=
OutElementOp
{};
if
(
conv_params
.
num_dim_spatial_
!=
NDimSpatial
)
{
std
::
cerr
<<
"unsupported # of spatials dimensions"
<<
std
::
endl
;
return
EXIT_FAILURE
;
}
// output image: GNHWK
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_params
);
// weight: GKYXC
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_params
);
// input image: GNHWC
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_params
);
return
!
run_conv_bwd_data
(
config
,
conv_params
,
out_g_n_k_wos_desc
,
wei_g_k_c_xs_desc
,
in_g_n_c_wis_desc
,
wei_element_op
,
out_element_op
,
in_element_op
);
}
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <array>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
template
<
ck
::
index_t
NDimSpatial
,
typename
InputLayout
,
typename
WeightLayout
,
typename
OutputLayout
,
typename
InputDataType
,
typename
WeightDataType
,
typename
OutputDataType
,
typename
InputElementwiseOperation
,
typename
WeightElementwiseOperation
,
typename
OutputElementwiseOperation
>
struct
DeviceGroupedConvBwdData
:
public
BaseOperator
{
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
void
*
p_input
,
const
void
*
p_weight
,
const
void
*
p_output
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
InputElementwiseOperation
&
input_element_op
,
const
WeightElementwiseOperation
&
weight_element_op
,
const
OutputElementwiseOperation
&
output_element_op
)
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
View file @
926fd388
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
#include <vector>
#include <vector>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -63,100 +62,6 @@ struct DeviceGroupedConvBwdDataMultipleD : public BaseOperator
...
@@ -63,100 +62,6 @@ struct DeviceGroupedConvBwdDataMultipleD : public BaseOperator
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
};
template
<
ck
::
index_t
NDimSpatial
,
typename
ALayout
,
typename
BLayout
,
typename
ELayout
,
typename
ADataType
,
typename
BDataType
,
typename
EDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CDEElementwiseOperation
>
struct
DeviceGroupedConvBwdDataMultipleD
<
NDimSpatial
,
ALayout
,
BLayout
,
Tuple
<>
,
ELayout
,
ADataType
,
BDataType
,
Tuple
<>
,
EDataType
,
AElementwiseOperation
,
BElementwiseOperation
,
CDEElementwiseOperation
>
:
public
DeviceGroupedConvBwdData
<
NDimSpatial
,
ELayout
,
BLayout
,
ALayout
,
EDataType
,
BDataType
,
ADataType
,
CDEElementwiseOperation
,
BElementwiseOperation
,
AElementwiseOperation
>
{
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_a
,
// output image
const
void
*
p_b
,
// weight
const
std
::
array
<
const
void
*
,
0
>&
,
// bias
void
*
p_e
,
// input image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_k_wos_lengths
,
// output image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_k_wos_strides
,
// output image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
// weight
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
,
// weight
const
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>&
,
// bias
const
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>&
,
// bias
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_c_wis_lengths
,
// input image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_c_wis_strides
,
// input image
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
AElementwiseOperation
&
a_element_op
,
const
BElementwiseOperation
&
b_element_op
,
const
CDEElementwiseOperation
&
cde_element_op
)
=
0
;
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
void
*
p_input
,
const
void
*
p_weight
,
const
void
*
p_output
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
CDEElementwiseOperation
&
input_element_op
,
const
BElementwiseOperation
&
weight_element_op
,
const
AElementwiseOperation
&
output_element_op
)
override
final
{
return
MakeArgumentPointer
(
p_output
,
p_weight
,
std
::
array
<
const
void
*
,
0
>
{},
p_input
,
output_g_n_k_wos_lengths
,
output_g_n_k_wos_strides
,
weight_g_k_c_xs_lengths
,
weight_g_k_c_xs_strides
,
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>
{},
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>
{},
input_g_n_c_wis_lengths
,
input_g_n_c_wis_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
output_element_op
,
weight_element_op
,
input_element_op
);
}
};
}
// namespace device
}
// namespace device
}
// namespace tensor_operation
}
// namespace tensor_operation
}
// namespace ck
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp
deleted
100644 → 0
View file @
79aa3fb1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// conv2d backward data
void
add_device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdData
<
2
,
GNHWC
,
GKYXC
,
GNHWK
,
F16
,
F16
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
);
template
<
ck
::
index_t
NumDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
,
typename
InDataType
,
typename
WeiDataType
,
typename
OutDataType
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdData
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
>>
{
using
DeviceOp
=
DeviceGroupedConvBwdData
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
>
;
static
auto
GetInstances
()
{
std
::
vector
<
std
::
unique_ptr
<
DeviceOp
>>
op_ptrs
;
if
constexpr
(
NumDimSpatial
==
2
&&
is_same_v
<
InLayout
,
GNHWC
>
&&
is_same_v
<
WeiLayout
,
GKYXC
>
&&
is_same_v
<
OutLayout
,
GNHWK
>
)
{
if
constexpr
(
is_same_v
<
InDataType
,
F16
>
&&
is_same_v
<
WeiDataType
,
F16
>
&&
is_same_v
<
OutDataType
,
F16
>
)
{
add_device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instances
(
op_ptrs
);
}
}
return
op_ptrs
;
}
};
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt
deleted
100644 → 0
View file @
79aa3fb1
add_instance_library
(
device_grouped_conv2d_bwd_data_instance
device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
)
library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
deleted
100644 → 0
View file @
79aa3fb1
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment