Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
bf98b476
Unverified
Commit
bf98b476
authored
Feb 13, 2024
by
Bartłomiej Kocot
Committed by
GitHub
Feb 13, 2024
Browse files
Add bilinear conv fwd and bwd data instances (#1164)
parent
a78be3f6
Changes
63
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1093 additions
and
71 deletions
+1093
-71
example/62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp
...62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp
example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp
...e/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp
example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp
example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp
...ple/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp
...le/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp
+11
-0
example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp
example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp
+11
-0
include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
...r_operation/gpu/element/binary_element_wise_operation.hpp
+11
-2
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
...eference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+186
-69
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp
...ta/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp
+132
-0
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp
...onv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp
+131
-0
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_bilinear.hpp
...stance/gpu/grouped_convolution_backward_data_bilinear.hpp
+150
-0
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bilinear.hpp
...ion_instance/gpu/grouped_convolution_forward_bilinear.hpp
+177
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt
...tance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt
+6
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
..._data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+50
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
...d_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+50
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
...d_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+50
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt
...n_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt
+7
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
...d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+55
-0
No files found.
example/62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
ClippedRelu
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Elu
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
LeakyRelu
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Power
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Relu
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
Sigmoid
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
SoftRelu
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "convnd_fwd_activ_unary_common.hpp"
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
TanH
;
using
DeviceGroupedConvNDActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
#include "../run_convnd_activ_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_convnd_example
(
argc
,
argv
);
}
include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
View file @
bf98b476
...
...
@@ -165,7 +165,7 @@ struct Subtract
struct
Bilinear
{
Bilinear
(
float
alpha
,
float
beta
)
:
alpha_
(
alpha
),
beta_
(
beta
){};
Bilinear
(
float
alpha
=
1.
f
,
float
beta
=
1.
f
)
:
alpha_
(
alpha
),
beta_
(
beta
){};
template
<
typename
Y
,
typename
X0
,
typename
X1
>
__host__
__device__
constexpr
void
operator
()(
Y
&
,
const
X0
&
,
const
X1
&
)
const
;
...
...
@@ -184,6 +184,14 @@ struct Bilinear
y
=
alpha_
*
x0
+
beta_
*
x1
;
};
template
<
>
__host__
__device__
constexpr
void
operator
()
<
int8_t
,
int8_t
,
int8_t
>
(
int8_t
&
y
,
const
int8_t
&
x0
,
const
int8_t
&
x1
)
const
{
y
=
type_convert
<
int8_t
>
(
alpha_
*
type_convert
<
float
>
(
x0
)
+
beta_
*
type_convert
<
float
>
(
x1
));
};
template
<
>
__host__
__device__
constexpr
void
operator
()
<
half_t
,
half_t
,
half_t
>
(
half_t
&
y
,
const
half_t
&
x0
,
const
half_t
&
x1
)
const
...
...
@@ -221,7 +229,8 @@ struct Bilinear
__host__
__device__
constexpr
void
operator
()
<
std
::
int8_t
,
std
::
int32_t
,
std
::
int8_t
>
(
std
::
int8_t
&
y
,
const
std
::
int32_t
&
x0
,
const
std
::
int8_t
&
x1
)
const
{
y
=
type_convert
<
std
::
int8_t
>
(
x0
+
ck
::
type_convert
<
std
::
int32_t
>
(
x1
));
y
=
type_convert
<
int8_t
>
(
alpha_
*
type_convert
<
float
>
(
x0
)
+
beta_
*
type_convert
<
float
>
(
x1
));
};
float
alpha_
;
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -25,25 +25,35 @@ template <ck::index_t NDimSpatial,
typename
InElementwiseOperation
,
typename
WeiElementwiseOperation
,
typename
OutElementwiseOperation
,
ck
::
index_t
NumAElementwiseTensor
=
0
,
ck
::
index_t
NumBElementwiseTensor
=
0
,
ck
::
index_t
NumDElementwiseTensor
=
0
,
typename
std
::
enable_if
<
NDimSpatial
>
=
1
&&
NDimSpatial
<=
3
,
bool
>::
type
=
false
>
struct
ReferenceConvBwdData
:
public
device
::
BaseOperator
{
// Argument
struct
Argument
:
public
device
::
BaseArgument
{
Argument
(
Tensor
<
InDataType
>&
input
,
const
Tensor
<
WeiDataType
>&
weight
,
const
Tensor
<
OutDataType
>&
output
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
)
Argument
(
Tensor
<
InDataType
>&
input
,
const
Tensor
<
WeiDataType
>&
weight
,
const
Tensor
<
OutDataType
>&
output
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
const
std
::
array
<
Tensor
<
InDataType
>
,
NumAElementwiseTensor
>&
elementwise_a_tensors
,
const
std
::
array
<
Tensor
<
WeiDataType
>
,
NumBElementwiseTensor
>&
elementwise_b_tensors
,
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDElementwiseTensor
>&
elementwise_d_tensors
)
:
input_
{
input
},
weight_
{
weight
},
output_
{
output
},
elementwise_a_tensors_
{
elementwise_a_tensors
},
elementwise_b_tensors_
{
elementwise_b_tensors
},
elementwise_d_tensors_
{
elementwise_d_tensors
},
conv_strides_
{
conv_filter_strides
},
conv_dilations_
{
conv_filter_dilations
},
in_left_pads_
{
input_left_pads
},
...
...
@@ -58,6 +68,10 @@ struct ReferenceConvBwdData : public device::BaseOperator
const
Tensor
<
WeiDataType
>&
weight_
;
const
Tensor
<
OutDataType
>&
output_
;
const
std
::
array
<
Tensor
<
InDataType
>
,
NumAElementwiseTensor
>&
elementwise_a_tensors_
;
const
std
::
array
<
Tensor
<
WeiDataType
>
,
NumBElementwiseTensor
>&
elementwise_b_tensors_
;
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDElementwiseTensor
>&
elementwise_d_tensors_
;
std
::
vector
<
index_t
>
conv_strides_
;
std
::
vector
<
index_t
>
conv_dilations_
;
std
::
vector
<
index_t
>
in_left_pads_
;
...
...
@@ -106,26 +120,46 @@ struct ReferenceConvBwdData : public device::BaseOperator
{
for
(
std
::
size_t
k
=
0
;
k
<
K
;
++
k
)
{
float
v_out
=
0
;
float
v_wei
=
0
;
arg
.
out_element_op_
(
v_out
,
ck
::
type_convert
<
float
>
(
arg
.
output_
(
g
,
n
,
k
,
wo
)));
arg
.
wei_element_op_
(
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
(
g
,
k
,
c
,
x
)));
v_acc
+=
v_out
*
v_wei
;
OutDataType
v_out
;
WeiDataType
v_wei
;
ExecuteElementwiseOp
(
arg
.
out_element_op_
,
arg
.
elementwise_a_tensors_
,
Number
<
NumAElementwiseTensor
>
{},
v_out
,
arg
.
output_
(
g
,
n
,
k
,
wo
),
g
,
n
,
k
,
wo
);
ExecuteElementwiseOp
(
arg
.
wei_element_op_
,
arg
.
elementwise_b_tensors_
,
Number
<
NumBElementwiseTensor
>
{},
v_wei
,
arg
.
weight_
(
g
,
k
,
c
,
x
),
g
,
k
,
c
,
x
);
v_acc
+=
ck
::
type_convert
<
float
>
(
v_out
)
*
ck
::
type_convert
<
float
>
(
v_wei
);
}
}
}
}
float
v_in
;
arg
.
in_element_op_
(
v_in
,
v_acc
);
arg
.
input_
(
g
,
n
,
c
,
wi
)
=
ck
::
type_convert
<
InDataType
>
(
v_in
);
InDataType
v_acc_converted
=
ck
::
type_convert
<
InDataType
>
(
v_acc
);
InDataType
&
v_in
=
arg
.
input_
(
g
,
n
,
c
,
wi
);
ExecuteElementwiseOp
(
arg
.
in_element_op_
,
arg
.
elementwise_d_tensors_
,
Number
<
NumDElementwiseTensor
>
{},
v_in
,
v_acc_converted
,
g
,
n
,
c
,
wi
);
};
make_ParallelTensorFunctor
(
f_ncw
,
...
...
@@ -175,20 +209,34 @@ struct ReferenceConvBwdData : public device::BaseOperator
{
for
(
std
::
size_t
k
=
0
;
k
<
K
;
++
k
)
{
float
v_out
=
0
;
float
v_wei
=
0
;
OutDataType
v_out
;
WeiDataType
v_wei
;
arg
.
out_element_op_
(
ExecuteElementwiseOp
(
arg
.
out_element_op_
,
arg
.
elementwise_a_tensors_
,
Number
<
NumAElementwiseTensor
>
{},
v_out
,
ck
::
type_convert
<
float
>
(
arg
.
output_
(
g
,
n
,
k
,
ho
,
wo
)));
arg
.
wei_element_op_
(
arg
.
output_
(
g
,
n
,
k
,
ho
,
wo
),
g
,
n
,
k
,
ho
,
wo
);
ExecuteElementwiseOp
(
arg
.
wei_element_op_
,
arg
.
elementwise_b_tensors_
,
Number
<
NumBElementwiseTensor
>
{},
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
(
g
,
k
,
c
,
y
,
x
)));
v_acc
+=
v_out
*
v_wei
;
arg
.
weight_
(
g
,
k
,
c
,
y
,
x
),
g
,
k
,
c
,
y
,
x
);
v_acc
+=
ck
::
type_convert
<
float
>
(
v_out
)
*
ck
::
type_convert
<
float
>
(
v_wei
);
}
}
}
...
...
@@ -197,11 +245,18 @@ struct ReferenceConvBwdData : public device::BaseOperator
}
}
float
v_in
;
arg
.
in_element_op_
(
v_in
,
v_acc
);
arg
.
input_
(
g
,
n
,
c
,
hi
,
wi
)
=
ck
::
type_convert
<
InDataType
>
(
v_in
);
InDataType
v_acc_converted
=
ck
::
type_convert
<
InDataType
>
(
v_acc
);
InDataType
&
v_in
=
arg
.
input_
(
g
,
n
,
c
,
hi
,
wi
);
ExecuteElementwiseOp
(
arg
.
in_element_op_
,
arg
.
elementwise_d_tensors_
,
Number
<
NumDElementwiseTensor
>
{},
v_in
,
v_acc_converted
,
g
,
n
,
c
,
hi
,
wi
);
};
make_ParallelTensorFunctor
(
f_nchw
,
...
...
@@ -270,20 +325,37 @@ struct ReferenceConvBwdData : public device::BaseOperator
{
for
(
std
::
size_t
k
=
0
;
k
<
K
;
++
k
)
{
float
v_out
=
0
;
float
v_wei
=
0
;
OutDataType
v_out
;
WeiDataType
v_wei
;
arg
.
out_element_op_
(
ExecuteElementwiseOp
(
arg
.
out_element_op_
,
arg
.
elementwise_a_tensors_
,
Number
<
NumAElementwiseTensor
>
{},
v_out
,
ck
::
type_convert
<
float
>
(
arg
.
output_
(
g
,
n
,
k
,
do_
,
ho
,
wo
)));
arg
.
wei_element_op_
(
arg
.
output_
(
g
,
n
,
k
,
do_
,
ho
,
wo
),
g
,
n
,
k
,
do_
,
ho
,
wo
);
ExecuteElementwiseOp
(
arg
.
wei_element_op_
,
arg
.
elementwise_b_tensors_
,
Number
<
NumBElementwiseTensor
>
{},
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
(
g
,
k
,
c
,
z
,
y
,
x
)));
v_acc
+=
v_out
*
v_wei
;
arg
.
weight_
(
g
,
k
,
c
,
z
,
y
,
x
),
g
,
k
,
c
,
z
,
y
,
x
);
v_acc
+=
ck
::
type_convert
<
float
>
(
v_out
)
*
ck
::
type_convert
<
float
>
(
v_wei
);
}
}
}
...
...
@@ -295,11 +367,19 @@ struct ReferenceConvBwdData : public device::BaseOperator
}
}
float
v_in
;
arg
.
in_element_op_
(
v_in
,
v_acc
);
arg
.
input_
(
g
,
n
,
c
,
di
,
hi
,
wi
)
=
ck
::
type_convert
<
InDataType
>
(
v_in
);
InDataType
v_acc_converted
=
ck
::
type_convert
<
InDataType
>
(
v_acc
);
InDataType
&
v_in
=
arg
.
input_
(
g
,
n
,
c
,
di
,
hi
,
wi
);
ExecuteElementwiseOp
(
arg
.
in_element_op_
,
arg
.
elementwise_d_tensors_
,
Number
<
NumDElementwiseTensor
>
{},
v_in
,
v_acc_converted
,
g
,
n
,
c
,
di
,
hi
,
wi
);
};
make_ParallelTensorFunctor
(
f_ncdhw
,
...
...
@@ -325,6 +405,36 @@ struct ReferenceConvBwdData : public device::BaseOperator
}
};
template
<
typename
...
Args
,
typename
ElementwiseOp
,
typename
ElementwiseTensor
,
typename
NumTensor
,
typename
T
>
static
void
ExecuteElementwiseOp
(
ElementwiseOp
&
elementwise_op
,
ElementwiseTensor
&
elementwise_tensors
,
NumTensor
,
T
&
y
,
const
T
&
x
,
Args
...
dims
)
{
if
constexpr
(
NumTensor
::
value
==
0
)
{
elementwise_op
(
y
,
x
);
}
else
if
constexpr
(
NumTensor
::
value
==
1
)
{
elementwise_op
(
y
,
x
,
elementwise_tensors
[
0
](
dims
...));
}
else
if
constexpr
(
NumTensor
::
value
==
2
)
{
elementwise_op
(
y
,
x
,
elementwise_tensors
[
0
](
dims
...),
elementwise_tensors
[
1
](
dims
...));
}
else
{
throw
std
::
runtime_error
(
"ElementOp not supported in reference."
);
}
}
static
constexpr
bool
IsValidCompilationParameter
()
{
// TODO: properly implement this check
...
...
@@ -333,16 +443,20 @@ struct ReferenceConvBwdData : public device::BaseOperator
bool
IsSupportedArgument
(
const
device
::
BaseArgument
*
)
override
{
return
true
;
}
static
auto
MakeArgument
(
Tensor
<
InDataType
>&
input
,
const
Tensor
<
WeiDataType
>&
weight
,
const
Tensor
<
OutDataType
>&
output
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
)
static
auto
MakeArgument
(
Tensor
<
InDataType
>&
input
,
const
Tensor
<
WeiDataType
>&
weight
,
const
Tensor
<
OutDataType
>&
output
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
const
std
::
array
<
Tensor
<
InDataType
>
,
NumAElementwiseTensor
>&
elementwise_a_tensors
=
{},
const
std
::
array
<
Tensor
<
WeiDataType
>
,
NumBElementwiseTensor
>&
elementwise_b_tensors
=
{},
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDElementwiseTensor
>&
elementwise_d_tensors
=
{})
{
return
Argument
{
input
,
weight
,
...
...
@@ -353,7 +467,10 @@ struct ReferenceConvBwdData : public device::BaseOperator
input_right_pads
,
in_element_op
,
wei_element_op
,
out_element_op
};
out_element_op
,
elementwise_a_tensors
,
elementwise_b_tensors
,
elementwise_d_tensors
};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp
0 → 100644
View file @
bf98b476
This diff is collapsed.
Click to expand it.
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp
0 → 100644
View file @
bf98b476
This diff is collapsed.
Click to expand it.
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_bilinear.hpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
#ifdef CK_ENABLE_FP16
void
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
F16
,
F16
,
Tuple
<
F16
>
,
F16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
#ifdef CK_ENABLE_FP32
void
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
F32
,
F32
,
Tuple
<
F32
>
,
F32
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
#ifdef CK_ENABLE_BF16
void
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
BF16
,
BF16
,
Tuple
<
BF16
>
,
BF16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
template
<
ck
::
index_t
NumDimSpatial
,
typename
OutLayout
,
typename
WeiLayout
,
typename
InLayout
,
typename
OutDataType
,
typename
WeiDataType
,
typename
InDataType
,
typename
ComputeTypeA
,
typename
ComputeTypeB
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvBwdDataMultipleD
<
NumDimSpatial
,
OutLayout
,
WeiLayout
,
Tuple
<
InLayout
>
,
InLayout
,
OutDataType
,
WeiDataType
,
Tuple
<
InDataType
>
,
InDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
Bilinear
,
ComputeTypeA
,
ComputeTypeB
>>
{
using
DeviceOp
=
DeviceGroupedConvBwdDataMultipleD
<
NumDimSpatial
,
OutLayout
,
WeiLayout
,
Tuple
<
InLayout
>
,
InLayout
,
OutDataType
,
WeiDataType
,
Tuple
<
InDataType
>
,
InDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
Bilinear
,
ComputeTypeA
,
ComputeTypeB
>
;
static
auto
GetInstances
()
{
std
::
vector
<
std
::
unique_ptr
<
DeviceOp
>>
op_ptrs
;
if
constexpr
(
NumDimSpatial
==
3
)
{
if
constexpr
(
is_same_v
<
InLayout
,
NDHWGC
>
&&
is_same_v
<
WeiLayout
,
GKZYXC
>
&&
is_same_v
<
OutLayout
,
NDHWGK
>
)
{
#ifdef CK_ENABLE_FP16
if
constexpr
(
is_same_v
<
InDataType
,
F16
>
&&
is_same_v
<
WeiDataType
,
F16
>
&&
is_same_v
<
OutDataType
,
F16
>
&&
is_same_v
<
ComputeTypeA
,
F16
>
&&
is_same_v
<
ComputeTypeB
,
F16
>
)
{
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_f16_instances
(
op_ptrs
);
}
#endif
#ifdef CK_ENABLE_FP32
else
if
constexpr
(
is_same_v
<
InDataType
,
F32
>
&&
is_same_v
<
WeiDataType
,
F32
>
&&
is_same_v
<
OutDataType
,
F32
>
&&
is_same_v
<
ComputeTypeA
,
F32
>
&&
is_same_v
<
ComputeTypeB
,
F32
>
)
{
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_f32_instances
(
op_ptrs
);
}
#endif
#ifdef CK_ENABLE_BF16
else
if
constexpr
(
is_same_v
<
InDataType
,
BF16
>
&&
is_same_v
<
WeiDataType
,
BF16
>
&&
is_same_v
<
OutDataType
,
BF16
>
&&
is_same_v
<
ComputeTypeA
,
BF16
>
&&
is_same_v
<
ComputeTypeB
,
BF16
>
)
{
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_bf16_instances
(
op_ptrs
);
}
#endif
}
}
return
op_ptrs
;
}
};
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bilinear.hpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
Bilinear
=
ck
::
tensor_operation
::
element_wise
::
Bilinear
;
#ifdef CK_ENABLE_BF16
// grouped conv3d forward, NDHWGC/GKZYXC/NDHWGK
void
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
>
,
NDHWGK
,
BF16
,
BF16
,
ck
::
Tuple
<
BF16
>
,
BF16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
#ifdef CK_ENABLE_FP16
void
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
>
,
NDHWGK
,
F16
,
F16
,
ck
::
Tuple
<
F16
>
,
F16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
#ifdef CK_ENABLE_FP32
void
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
>
,
NDHWGK
,
F32
,
F32
,
ck
::
Tuple
<
F32
>
,
F32
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
#ifdef CK_ENABLE_INT8
void
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
>
,
NDHWGK
,
int8_t
,
int8_t
,
ck
::
Tuple
<
int8_t
>
,
int8_t
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
);
#endif
template
<
ck
::
index_t
NumDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
DLayouts
,
typename
OutLayout
,
typename
InDataType
,
typename
WeiDataType
,
typename
DDataTypes
,
typename
OutDataType
,
typename
ComputeType
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleABD
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
DLayouts
,
OutLayout
,
InDataType
,
WeiDataType
,
DDataTypes
,
OutDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
Bilinear
,
ComputeType
>>
{
using
DeviceOp
=
DeviceGroupedConvFwdMultipleABD
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
DLayouts
,
OutLayout
,
InDataType
,
WeiDataType
,
DDataTypes
,
OutDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
Bilinear
,
ComputeType
>
;
static
auto
GetInstances
()
{
std
::
vector
<
std
::
unique_ptr
<
DeviceOp
>>
op_ptrs
;
if
constexpr
(
NumDimSpatial
==
3
&&
is_same_v
<
InLayout
,
NDHWGC
>
&&
is_same_v
<
WeiLayout
,
GKZYXC
>
&&
is_same_v
<
OutLayout
,
NDHWGK
>
&&
DLayouts
::
Size
()
==
1
&&
is_same_v
<
tuple_element_t
<
0
,
DLayouts
>
,
NDHWGK
>
)
{
#ifdef CK_ENABLE_FP32
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
WeiDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
{
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
op_ptrs
);
}
#endif
#ifdef CK_ENABLE_FP16
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
WeiDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
&&
is_same_v
<
ComputeType
,
half_t
>
)
{
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
op_ptrs
);
}
#endif
#ifdef CK_ENABLE_BF16
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
WeiDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
{
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
op_ptrs
);
}
#endif
#ifdef CK_ENABLE_INT8
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
WeiDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
op_ptrs
);
}
#endif
}
return
op_ptrs
;
}
};
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt
0 → 100644
View file @
bf98b476
set
(
GROUPED_CONV3D_BWD_DATA_BILINEAR
xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
)
add_instance_library
(
device_grouped_conv3d_bwd_data_bilinear_instance
${
GROUPED_CONV3D_BWD_DATA_BILINEAR
}
)
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, di, hi, wi, g, c] * wei[g, k, z, y, x, c] = in[n, do, ho, wo,
// g, k]
void
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
BF16
,
BF16
,
Tuple
<
BF16
>
,
BF16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bilinear_bf16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bilinear_bf16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, di, hi, wi, g, c] * wei[g, k, z, y, x, c] = in[n, do, ho, wo,
// g, k]
void
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
F16
,
F16
,
Tuple
<
F16
>
,
F16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bilinear_f16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bilinear_f16_instances
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for out[n, di, hi, wi, g, c] * wei[g, k, z, y, x, c] = in[n, do, ho, wo,
// g, k]
void
add_device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgk_gkzyxc_ndhwgc_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvBwdDataMultipleD
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
F32
,
F32
,
Tuple
<
F32
>
,
F32
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
)
{
// 1. Default
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bilinear_f32_instances
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
ConvBwdDataDefault
>
{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances
(
instances
,
device_grouped_conv_bwd_data_xdl_bilinear_f32_instances
<
3
,
NDHWGK
,
GKZYXC
,
Tuple
<
NDHWGC
>
,
NDHWGC
,
ConvBwdDataFilter1x1Stride1Pad0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt
0 → 100644
View file @
bf98b476
set
(
GROUPED_CONV3D_FWD_BILINEAR
xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
)
add_instance_library
(
device_grouped_conv3d_fwd_bilinear_instance
${
GROUPED_CONV3D_FWD_BILINEAR
}
)
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
0 → 100644
View file @
bf98b476
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
>
,
NDHWGK
,
BF16
,
BF16
,
ck
::
Tuple
<
BF16
>
,
BF16
,
PassThrough
,
PassThrough
,
Bilinear
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_bilinear_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
Tuple
<
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_bilinear_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
Tuple
<
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_bilinear_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
Tuple
<
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment