Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4698993d
Unverified
Commit
4698993d
authored
Nov 15, 2022
by
Po Yen Chen
Committed by
GitHub
Nov 15, 2022
Browse files
Merge branch 'develop' into wmma_op
parents
ab663329
7038723a
Changes
202
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
104 additions
and
252 deletions
+104
-252
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc
...ultiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc
+2
-2
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
...bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
+1
-1
example/39_permute/common.hpp
example/39_permute/common.hpp
+3
-15
example/39_permute/run_permute_bundle_example.inc
example/39_permute/run_permute_bundle_example.inc
+1
-1
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp
+1
-0
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp
+1
-0
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp
+1
-0
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
+1
-0
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp
..._grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp
+1
-0
example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc
...ouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc
+7
-11
example/42_groupnorm/groupnorm_sigmoid_fp16.cpp
example/42_groupnorm/groupnorm_sigmoid_fp16.cpp
+4
-4
example/44_conv2d_fwd_quant/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp
...t/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp
+2
-1
example/44_conv2d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp
...d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp
+2
-1
example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp
...le/44_elementwise_permute/elementwise_permute_4D_fp16.cpp
+2
-1
example/CMakeLists.txt
example/CMakeLists.txt
+2
-0
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp
...sor_operation/gpu/device/device_grouped_conv_bwd_data.hpp
+0
-49
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
...on/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
+1
-96
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp
...r_operation/gpu/device/device_grouped_conv_bwd_weight.hpp
+11
-10
include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp
...k/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp
+21
-22
include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...e_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+40
-38
No files found.
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc
View file @
4698993d
...
...
@@ -61,7 +61,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
()
,
y
.
begin
());
};
auto
copy
=
[](
const
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
a_g_n_k_wos_lengths
);
copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
a_g_n_k_wos_strides
);
...
...
@@ -157,7 +157,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
in_host
.
mData
);
return
ck
::
utils
::
check_err
(
in_device
,
in_host
);
}
return
true
;
...
...
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
View file @
4698993d
...
...
@@ -52,7 +52,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
()
,
y
.
begin
());
};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
out_g_n_k_wos_desc
.
GetLengths
(),
a_g_n_k_wos_lengths
);
copy
(
out_g_n_k_wos_desc
.
GetStrides
(),
a_g_n_k_wos_strides
);
...
...
example/39_permute/common.hpp
View file @
4698993d
...
...
@@ -19,6 +19,7 @@
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/utility/type.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/fill.hpp"
...
...
@@ -247,19 +248,6 @@ inline auto to_array(Range& range) noexcept
return
detail
::
to_array_proxy
<
ck
::
remove_cvref_t
<
Range
>>
{
range
};
}
namespace
ranges
{
template
<
typename
InputRange
,
typename
OutputIterator
>
inline
auto
copy
(
InputRange
&&
range
,
OutputIterator
iter
)
->
decltype
(
std
::
copy
(
std
::
begin
(
std
::
forward
<
InputRange
>
(
range
)),
std
::
end
(
std
::
forward
<
InputRange
>
(
range
)),
iter
))
{
return
std
::
copy
(
std
::
begin
(
std
::
forward
<
InputRange
>
(
range
)),
std
::
end
(
std
::
forward
<
InputRange
>
(
range
)),
iter
);
}
}
// namespace ranges
template
<
typename
Axes
>
inline
auto
is_valid_axes
(
const
Axes
&
axes
)
->
std
::
enable_if_t
<
detail
::
is_random_access_range_v
<
Axes
>
,
bool
>
...
...
@@ -350,7 +338,7 @@ auto extend_shape(const Problem::Shape& shape, std::size_t new_dim)
using
std
::
begin
,
std
::
end
;
std
::
copy
(
begin
(
shape
),
end
(
shape
)
,
begin
(
extended_shape
));
ck
::
ranges
::
copy
(
shape
,
begin
(
extended_shape
));
extended_shape
.
back
()
=
new_dim
;
return
extended_shape
;
...
...
@@ -362,7 +350,7 @@ auto extend_axes(const Problem::Axes& axes)
using
std
::
begin
,
std
::
end
;
std
::
copy
(
begin
(
axes
),
end
(
axes
)
,
begin
(
extended_axes
));
ck
::
ranges
::
copy
(
axes
,
begin
(
extended_axes
));
extended_axes
.
back
()
=
detail
::
get_array_size_v
<
Problem
::
Axes
>
;
return
extended_axes
;
...
...
example/39_permute/run_permute_bundle_example.inc
View file @
4698993d
...
...
@@ -57,7 +57,7 @@ bool run_permute_bundle(const Problem& problem)
using
std
::
begin
;
Tensor
<
DataType
>
input_tensor
(
input_shape
);
ranges
::
copy
(
input_bundle_tensor
.
AsSpan
<
const
DataType
>
(),
begin
(
input_tensor
));
ck
::
ranges
::
copy
(
input_bundle_tensor
.
AsSpan
<
const
DataType
>
(),
begin
(
input_tensor
));
Tensor
<
DataType
>
output_tensor
(
transpose
(
input_shape
,
input_axes
));
if
(
!
host_permute
(
input_tensor
,
input_axes
,
PassThrough
{},
output_tensor
))
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp
View file @
4698993d
...
...
@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp
View file @
4698993d
...
...
@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp
View file @
4698993d
...
...
@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp
View file @
4698993d
...
...
@@ -15,6 +15,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp
View file @
4698993d
...
...
@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc
View file @
4698993d
...
...
@@ -97,7 +97,7 @@ bool run_grouped_conv_conv_fwd(bool do_verification,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input1_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input1_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
()
,
y
.
begin
());
};
auto
copy
=
[](
const
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
in0_g_n_c_wis_desc
.
GetLengths
(),
a0_g_n_c_wis_lengths
);
copy
(
in0_g_n_c_wis_desc
.
GetStrides
(),
a0_g_n_c_wis_strides
);
...
...
@@ -120,18 +120,14 @@ bool run_grouped_conv_conv_fwd(bool do_verification,
const
ck
::
index_t
gemm_batch
=
a0_g_n_c_wis_lengths
[
0
];
const
ck
::
index_t
gemm0_m_length
=
e1_g_n_k_wos_lengths
[
1
]
*
std
::
accumulate
(
e1_g_n_k_wos_lengths
.
begin
()
+
3
,
e1_g_n_k_wos_lengths
.
begin
()
+
3
+
NDimSpatial
,
ck
::
index_t
{
1
},
std
::
multiplies
<
ck
::
index_t
>
{});
e1_g_n_k_wos_lengths
[
1
]
*
ck
::
accumulate_n
<
ck
::
index_t
>
(
e1_g_n_k_wos_lengths
.
begin
()
+
3
,
NDimSpatial
,
1
,
std
::
multiplies
<>
{});
const
ck
::
index_t
gemm0_n_length
=
b0_g_k_c_xs_lengths
[
1
];
const
ck
::
index_t
gemm0_k_length
=
std
::
accumulate
(
b0_g_k_c_xs_lengths
.
begin
()
+
2
,
b0_g_k_c_xs_lengths
.
begin
()
+
2
+
NDimSpatial
+
1
,
ck
::
index_t
{
1
},
std
::
multiplies
<
ck
::
index_t
>
{});
const
ck
::
index_t
gemm0_k_length
=
ck
::
accumulate_n
<
ck
::
index_t
>
(
b0_g_k_c_xs_lengths
.
begin
()
+
2
,
NDimSpatial
+
1
,
1
,
std
::
multiplies
<>
{});
const
ck
::
index_t
gemm1_n_length
=
b1_g_k_c_xs_lengths
[
1
];
...
...
@@ -261,7 +257,7 @@ bool run_grouped_conv_conv_fwd(bool do_verification,
#endif
return
ck
::
utils
::
check_err
(
out1_device
.
mData
,
out1_host
.
mData
,
"Error: incorrect results!"
,
1
e
-
5
f
,
1
e
-
4
f
);
out1_device
,
out1_host
,
"Error: incorrect results!"
,
1
e
-
5
f
,
1
e
-
4
f
);
}
return
true
;
...
...
example/42_groupnorm/groupnorm_sigmoid_fp16.cpp
View file @
4698993d
...
...
@@ -100,9 +100,9 @@ int main(int argc, char* argv[])
Tensor
<
GammaDataType
>
gamma
({
G
,
C
});
Tensor
<
BetaDataType
>
beta
({
G
,
C
});
ck
::
utils
::
FillUniformDistribution
<
XDataType
>
{
0.
f
,
1.
f
}(
x
.
begin
(),
x
.
end
()
);
ck
::
utils
::
FillUniformDistribution
<
GammaDataType
>
{
0.
f
,
1.
f
}(
gamma
.
begin
(),
gamma
.
end
()
);
ck
::
utils
::
FillUniformDistribution
<
BetaDataType
>
{
0.
f
,
1.
f
}(
beta
.
begin
(),
beta
.
end
()
);
ck
::
utils
::
FillUniformDistribution
<
XDataType
>
{
0.
f
,
1.
f
}(
x
);
ck
::
utils
::
FillUniformDistribution
<
GammaDataType
>
{
0.
f
,
1.
f
}(
gamma
);
ck
::
utils
::
FillUniformDistribution
<
BetaDataType
>
{
0.
f
,
1.
f
}(
beta
);
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpaceSize
());
...
...
@@ -167,7 +167,7 @@ int main(int argc, char* argv[])
ref_invoker
.
Run
(
ref_argument
);
y_dev
.
FromDevice
(
y
.
mData
.
data
());
pass
&=
ck
::
utils
::
check_err
(
y
.
mData
,
host_y
.
mData
,
"Error: Incorrect results"
,
1e-3
,
1e-3
);
pass
&=
ck
::
utils
::
check_err
(
y
,
host_y
,
"Error: Incorrect results"
,
1e-3
,
1e-3
);
}
return
(
pass
?
0
:
1
);
...
...
example/44_conv2d_fwd_quant/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp
View file @
4698993d
...
...
@@ -6,6 +6,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
@@ -144,7 +145,7 @@ bool run_grouped_conv_fwd(bool do_verification,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
()
,
y
.
begin
());
};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
a_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
a_g_n_c_wis_strides
);
...
...
example/44_conv2d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp
View file @
4698993d
...
...
@@ -6,6 +6,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
@@ -131,7 +132,7 @@ bool run_grouped_conv_fwd(bool do_verification,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
()
,
y
.
begin
());
};
auto
copy
=
[](
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
copy
(
in_g_n_c_wis_desc
.
GetLengths
(),
a_g_n_c_wis_lengths
);
copy
(
in_g_n_c_wis_desc
.
GetStrides
(),
a_g_n_c_wis_strides
);
...
...
example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp
View file @
4698993d
...
...
@@ -5,6 +5,7 @@
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
@@ -69,7 +70,7 @@ int main()
static_cast
<
int
>
(
nhwc
[
2
]
*
nhwc
[
3
]),
static_cast
<
int
>
(
nhwc
[
3
])};
std
::
copy
(
nchw
.
begin
(),
nchw
.
end
()
,
ab_lengths
.
begin
());
ck
::
ranges
::
copy
(
nchw
,
ab_lengths
.
begin
());
auto
broadcastPermute
=
DeviceElementwisePermuteInstance
{};
auto
argument
=
broadcastPermute
.
MakeArgumentPointer
(
...
...
example/CMakeLists.txt
View file @
4698993d
...
...
@@ -12,6 +12,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
add_test
(
NAME
${
EXAMPLE_NAME
}
COMMAND $<TARGET_FILE:
${
EXAMPLE_NAME
}
>
${
ARGN
}
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
add_dependencies
(
check
${
EXAMPLE_NAME
}
)
rocm_install
(
TARGETS
${
EXAMPLE_NAME
}
COMPONENT examples
)
endfunction
(
add_example_executable EXAMPLE_NAME
)
function
(
add_example_executable_no_testing EXAMPLE_NAME FILE_NAME
)
...
...
@@ -19,6 +20,7 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE utility
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
rocm_install
(
TARGETS
${
EXAMPLE_NAME
}
COMPONENT examples
)
endfunction
(
add_example_executable_no_testing EXAMPLE_NAME
)
# add all example subdir
...
...
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp
deleted
100644 → 0
View file @
ab663329
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <array>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
template
<
ck
::
index_t
NDimSpatial
,
typename
InputLayout
,
typename
WeightLayout
,
typename
OutputLayout
,
typename
InputDataType
,
typename
WeightDataType
,
typename
OutputDataType
,
typename
InputElementwiseOperation
,
typename
WeightElementwiseOperation
,
typename
OutputElementwiseOperation
>
struct
DeviceGroupedConvBwdData
:
public
BaseOperator
{
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
void
*
p_input
,
const
void
*
p_weight
,
const
void
*
p_output
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
InputElementwiseOperation
&
input_element_op
,
const
WeightElementwiseOperation
&
weight_element_op
,
const
OutputElementwiseOperation
&
output_element_op
)
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp
View file @
4698993d
...
...
@@ -3,10 +3,9 @@
#pragma once
#include <
vector
>
#include <
array
>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -63,100 +62,6 @@ struct DeviceGroupedConvBwdDataMultipleD : public BaseOperator
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
template
<
ck
::
index_t
NDimSpatial
,
typename
ALayout
,
typename
BLayout
,
typename
ELayout
,
typename
ADataType
,
typename
BDataType
,
typename
EDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CDEElementwiseOperation
>
struct
DeviceGroupedConvBwdDataMultipleD
<
NDimSpatial
,
ALayout
,
BLayout
,
Tuple
<>
,
ELayout
,
ADataType
,
BDataType
,
Tuple
<>
,
EDataType
,
AElementwiseOperation
,
BElementwiseOperation
,
CDEElementwiseOperation
>
:
public
DeviceGroupedConvBwdData
<
NDimSpatial
,
ELayout
,
BLayout
,
ALayout
,
EDataType
,
BDataType
,
ADataType
,
CDEElementwiseOperation
,
BElementwiseOperation
,
AElementwiseOperation
>
{
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_a
,
// output image
const
void
*
p_b
,
// weight
const
std
::
array
<
const
void
*
,
0
>&
,
// bias
void
*
p_e
,
// input image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_k_wos_lengths
,
// output image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_k_wos_strides
,
// output image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_lengths
,
// weight
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b_g_k_c_xs_strides
,
// weight
const
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>&
,
// bias
const
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>&
,
// bias
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_c_wis_lengths
,
// input image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
e_g_n_c_wis_strides
,
// input image
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
AElementwiseOperation
&
a_element_op
,
const
BElementwiseOperation
&
b_element_op
,
const
CDEElementwiseOperation
&
cde_element_op
)
=
0
;
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
void
*
p_input
,
const
void
*
p_weight
,
const
void
*
p_output
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
input_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
weight_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
output_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
CDEElementwiseOperation
&
input_element_op
,
const
BElementwiseOperation
&
weight_element_op
,
const
AElementwiseOperation
&
output_element_op
)
override
final
{
return
MakeArgumentPointer
(
p_output
,
p_weight
,
std
::
array
<
const
void
*
,
0
>
{},
p_input
,
output_g_n_k_wos_lengths
,
output_g_n_k_wos_strides
,
weight_g_k_c_xs_lengths
,
weight_g_k_c_xs_strides
,
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>
{},
std
::
array
<
std
::
array
<
index_t
,
NDimSpatial
+
3
>
,
0
>
{},
input_g_n_c_wis_lengths
,
input_g_n_c_wis_strides
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
output_element_op
,
weight_element_op
,
input_element_op
);
}
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
include/ck/tensor_operation/gpu/device/device_conv_bwd_weight.hpp
→
include/ck/tensor_operation/gpu/device/device_
grouped_
conv_bwd_weight.hpp
View file @
4698993d
...
...
@@ -3,7 +3,7 @@
#pragma once
#include <
vector
>
#include <
array
>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
...
...
@@ -11,7 +11,7 @@ namespace ck {
namespace
tensor_operation
{
namespace
device
{
template
<
ck
::
index_t
N
um
DimSpatial
,
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
OutLayout
,
...
...
@@ -21,22 +21,23 @@ template <ck::index_t NumDimSpatial,
typename
InElementwiseOperation
,
typename
WeiElementwiseOperation
,
typename
OutElementwiseOperation
>
struct
DeviceConvBwdWeight
:
public
BaseOperator
struct
Device
Grouped
ConvBwdWeight
:
public
BaseOperator
{
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_in
,
void
*
p_wei
,
const
void
*
p_out
,
ck
::
index_t
G
,
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
...
...
include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp
View file @
4698993d
...
...
@@ -14,39 +14,38 @@ namespace device {
// Convolution Forward:
// input : input image A[G, N, C, Hi, Wi],
// input : weight B[G, K, C, Y, X],
// input : D0[G, N, K, Ho, Wo], D1[G, N, K, Ho, Wo], ...
// output : output image E[G, N, K, Ho, Wo]
// C = a_op(A) * b_op(B)
// E = cde_op(C, D0, D1, ...)
template
<
index_t
NDimSpatial
,
typename
A
Layout
,
typename
B
Layout
,
typename
C
Layout
,
typename
A
DataType
,
typename
B
DataType
,
typename
C
DataType
,
typename
A
ElementwiseOperation
,
typename
B
ElementwiseOperation
,
typename
C
ElementwiseOperation
>
typename
In
Layout
,
typename
Wei
Layout
,
typename
Out
Layout
,
typename
In
DataType
,
typename
Wei
DataType
,
typename
Out
DataType
,
typename
In
ElementwiseOperation
,
typename
Wei
ElementwiseOperation
,
typename
Out
ElementwiseOperation
>
struct
DeviceGroupedConvFwd
:
public
BaseOperator
{
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_
a
,
// input image
const
void
*
p_
b
,
// weight
void
*
p_
c
,
// output image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a
_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a
_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b
_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
b
_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c
_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
c
_g_n_k_wos_strides
,
MakeArgumentPointer
(
const
void
*
p_
in
,
// input image
const
void
*
p_
wei
,
// weight
void
*
p_
out
,
// output image
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
in
_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
in
_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
wei
_g_k_c_xs_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
wei
_g_k_c_xs_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
out
_g_n_k_wos_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
out
_g_n_k_wos_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
conv_filter_dilations
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_left_pads
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_right_pads
,
const
A
ElementwiseOperation
&
a
_element_op
,
const
B
ElementwiseOperation
&
b
_element_op
,
const
C
ElementwiseOperation
&
c
_element_op
)
=
0
;
const
In
ElementwiseOperation
&
in
_element_op
,
const
Wei
ElementwiseOperation
&
wei
_element_op
,
const
Out
ElementwiseOperation
&
out
_element_op
)
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
...
...
include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
4698993d
...
...
@@ -67,6 +67,8 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
WeiElementwiseOperation
,
OutElementwiseOperation
>
{
static
constexpr
ck
::
index_t
NDimSpatial
=
2
;
using
DeviceOp
=
DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
;
...
...
@@ -107,18 +109,18 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
static
constexpr
auto
BBlockLdsN0PerBlock
=
NPerBlock
/
BBlockLdsN1PerBlock
;
static
constexpr
auto
BBlockLdsN1Padding
=
4
;
static
auto
MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N
(
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
ck
::
index_t
batch_k
)
static
auto
MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N
(
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
,
ck
::
index_t
batch_k
)
{
using
namespace
ck
;
...
...
@@ -390,13 +392,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
,
ck
::
index_t
M01
,
ck
::
index_t
N01
,
InElementwiseOperation
in_element_op
,
...
...
@@ -473,11 +475,11 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
index_t
Conv_N_
;
index_t
Conv_K_
;
index_t
Conv_C_
;
std
::
vector
<
index_t
>
output_spatial_lengths_
;
std
::
vector
<
index_t
>
filter_spatial_lengths_
;
std
::
vector
<
index_t
>
conv_filter_strides_
;
std
::
vector
<
index_t
>
input_left_pads_
;
std
::
vector
<
index_t
>
input_right_pads_
;
std
::
array
<
index_t
,
NDimSpatial
>
output_spatial_lengths_
;
std
::
array
<
index_t
,
NDimSpatial
>
filter_spatial_lengths_
;
std
::
array
<
index_t
,
NDimSpatial
>
conv_filter_strides_
;
std
::
array
<
index_t
,
NDimSpatial
>
input_left_pads_
;
std
::
array
<
index_t
,
NDimSpatial
>
input_right_pads_
;
index_t
k_batch_
;
};
...
...
@@ -682,13 +684,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
...
...
@@ -724,13 +726,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
filter_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
output_spatial_lengths
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
,
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
...
...
Prev
1
2
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment