Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c3738ce3
Commit
c3738ce3
authored
Oct 30, 2023
by
Bartlomiej Kocot
Browse files
Fixes
parent
eb898ad6
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
158 additions
and
151 deletions
+158
-151
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
..._grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
+11
-0
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
...scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
+6
-5
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp
...add_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp
+4
-2
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp
...add_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp
+4
-2
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp
...add_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp
+4
-2
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp
...add_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp
+4
-2
client_example/23_grouped_convnd_fwd_scaleaddx2_relu/CMakeLists.txt
...mple/23_grouped_convnd_fwd_scaleaddx2_relu/CMakeLists.txt
+0
-11
example/62_conv_fwd_activ/CMakeLists.txt
example/62_conv_fwd_activ/CMakeLists.txt
+2
-2
example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp
example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp
+2
-3
example/62_conv_fwd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
..._fwd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
+4
-5
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+17
-18
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleaddx2_relu_instance.hpp
.../device_grouped_conv_fwd_xdl_scaleaddx2_relu_instance.hpp
+4
-4
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_scaleadd_relu.hpp
...pu/grouped_convolution_forward_scaleadd_scaleadd_relu.hpp
+9
-8
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
.../grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
+7
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
...eadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+20
-20
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
...leadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+20
-20
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
...leadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+20
-20
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
...eadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
+20
-20
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleaddx2_relu/CMakeLists.txt
...nce/gpu/grouped_conv3d_fwd_scaleaddx2_relu/CMakeLists.txt
+0
-7
No files found.
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
0 → 100644
View file @
c3738ce3
add_executable
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 PRIVATE composable_kernel::device_operations
)
client_example/23_grouped_convnd_fwd_scaleadd
x2
_relu/grouped_conv_fwd_scaleadd
x2
_relu.inc
→
client_example/23_grouped_convnd_fwd_scaleadd
_scaleadd
_relu/grouped_conv_fwd_scaleadd
_scaleadd
_relu.inc
View file @
c3738ce3
...
...
@@ -9,7 +9,7 @@
#include <vector>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd
x2
_relu.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd
_scaleadd
_relu.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
...
...
@@ -50,11 +50,11 @@ struct SimpleDeviceMem
void
*
p_mem_
;
};
int
main
()
int
execute_conv_fwd_scaleadd_scaleadd_relu
()
{
// We have NHWGC/GKYXC/NHWGK (x, weight, y) in memory space
// However, CK's API only accept length and stride with order of GNCDHW/GKCZYX/GNKDHW
// Hence, we need to adjust the order of stride
// We have NHWGC/GKYXC/NHWGK (x, weight, y) in memory space
.
// However, CK's API only accept
s
length
s
and stride
s
with order of GNCDHW/GKCZYX/GNKDHW
.
// Hence, we need to adjust the order of stride
s.
std
::
array
<
ck
::
index_t
,
6
>
in_lengths
{
G
,
N
,
C
,
Di
,
Hi
,
Wi
};
std
::
array
<
ck
::
index_t
,
6
>
in_strides
{
C
,
Di
*
Hi
*
Wi
*
G
*
C
,
1
,
Hi
*
Wi
*
G
*
C
,
Wi
*
G
*
C
,
G
*
C
};
...
...
@@ -208,4 +208,5 @@ int main()
std
::
cout
<<
"Done"
<<
std
::
endl
;
}
return
0
;
}
client_example/23_grouped_convnd_fwd_scaleadd
x2
_relu/grouped_conv_fwd_scaleadd
x2
_relu_bf16.cpp
→
client_example/23_grouped_convnd_fwd_scaleadd
_scaleadd
_relu/grouped_conv_fwd_scaleadd
_scaleadd
_relu_bf16.cpp
View file @
c3738ce3
...
...
@@ -8,8 +8,10 @@
using
InDataType
=
ck
::
bhalf_t
;
using
WeiDataType
=
ck
::
bhalf_t
;
using
OutDataType
=
ck
::
bhalf_t
;
// Use std tuple instead ck tuple to avoid clang
// Use std tuple instead
of
ck tuple to avoid clang
// implicit instantiation of undefined template error.
using
DDataTypes
=
std
::
tuple
<
ck
::
bhalf_t
,
ck
::
bhalf_t
>
;
#include "grouped_conv_fwd_scaleaddx2_relu.inc"
#include "grouped_conv_fwd_scaleadd_scaleadd_relu.inc"
int
main
()
{
return
execute_conv_fwd_scaleadd_scaleadd_relu
();
}
client_example/23_grouped_convnd_fwd_scaleadd
x2
_relu/grouped_conv_fwd_scaleadd
x2
_relu_fp16.cpp
→
client_example/23_grouped_convnd_fwd_scaleadd
_scaleadd
_relu/grouped_conv_fwd_scaleadd
_scaleadd
_relu_fp16.cpp
View file @
c3738ce3
...
...
@@ -8,8 +8,10 @@
using
InDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
// Use std tuple instead ck tuple to avoid clang
// Use std tuple instead
of
ck tuple to avoid clang
// implicit instantiation of undefined template error.
using
DDataTypes
=
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
>
;
#include "grouped_conv_fwd_scaleaddx2_relu.inc"
#include "grouped_conv_fwd_scaleadd_scaleadd_relu.inc"
int
main
()
{
return
execute_conv_fwd_scaleadd_scaleadd_relu
();
}
client_example/23_grouped_convnd_fwd_scaleadd
x2
_relu/grouped_conv_fwd_scaleadd
x2
_relu_fp32.cpp
→
client_example/23_grouped_convnd_fwd_scaleadd
_scaleadd
_relu/grouped_conv_fwd_scaleadd
_scaleadd
_relu_fp32.cpp
View file @
c3738ce3
...
...
@@ -8,8 +8,10 @@
using
InDataType
=
float
;
using
WeiDataType
=
float
;
using
OutDataType
=
float
;
// Use std tuple instead ck tuple to avoid clang
// Use std tuple instead
of
ck tuple to avoid clang
// implicit instantiation of undefined template error.
using
DDataTypes
=
std
::
tuple
<
float
,
float
>
;
#include "grouped_conv_fwd_scaleaddx2_relu.inc"
#include "grouped_conv_fwd_scaleadd_scaleadd_relu.inc"
int
main
()
{
return
execute_conv_fwd_scaleadd_scaleadd_relu
();
}
client_example/23_grouped_convnd_fwd_scaleadd
x2
_relu/grouped_conv_fwd_scaleadd
x2
_relu_int8.cpp
→
client_example/23_grouped_convnd_fwd_scaleadd
_scaleadd
_relu/grouped_conv_fwd_scaleadd
_scaleadd
_relu_int8.cpp
View file @
c3738ce3
...
...
@@ -8,8 +8,10 @@
using
InDataType
=
int8_t
;
using
WeiDataType
=
int8_t
;
using
OutDataType
=
int8_t
;
// Use std tuple instead ck tuple to avoid clang
// Use std tuple instead
of
ck tuple to avoid clang
// implicit instantiation of undefined template error.
using
DDataTypes
=
std
::
tuple
<
float
,
float
>
;
#include "grouped_conv_fwd_scaleaddx2_relu.inc"
#include "grouped_conv_fwd_scaleadd_scaleadd_relu.inc"
int
main
()
{
return
execute_conv_fwd_scaleadd_scaleadd_relu
();
}
client_example/23_grouped_convnd_fwd_scaleaddx2_relu/CMakeLists.txt
deleted
100644 → 0
View file @
eb898ad6
add_executable
(
client_grouped_convnd_fwd_scaleaddx2_relu_fp32 grouped_conv_fwd_scaleaddx2_relu_fp32.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleaddx2_relu_fp32 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_grouped_convnd_fwd_scaleaddx2_relu_fp16 grouped_conv_fwd_scaleaddx2_relu_fp16.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleaddx2_relu_fp16 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_grouped_convnd_fwd_scaleaddx2_relu_bf16 grouped_conv_fwd_scaleaddx2_relu_bf16.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleaddx2_relu_bf16 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_grouped_convnd_fwd_scaleaddx2_relu_int8 grouped_conv_fwd_scaleaddx2_relu_int8.cpp
)
target_link_libraries
(
client_grouped_convnd_fwd_scaleaddx2_relu_int8 PRIVATE composable_kernel::device_operations
)
example/62_conv_fwd_activ/CMakeLists.txt
View file @
c3738ce3
...
...
@@ -31,8 +31,8 @@ foreach(gpu IN LISTS GPU_TARGETS)
add_example_executable
(
example_convnd_fwd_xdl_elu_fp16 convnd_fwd_xdl_elu_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_convnd_fwd_xdl_elu_fp16
)
# ScaleAdd ScaleAdd Relu
add_example_executable
(
example_convnd_fwd_xdl_scaleadd
x2
_relu_fp16 convnd_fwd_xdl_scaleadd
x2
_relu_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_convnd_fwd_xdl_scaleadd
x2
_relu_fp16
)
add_example_executable
(
example_convnd_fwd_xdl_scaleadd
_scaleadd
_relu_fp16 convnd_fwd_xdl_scaleadd
_scaleadd
_relu_fp16.cpp
)
add_example_dependencies
(
example_convnd_fwd_activ_xdl example_convnd_fwd_xdl_scaleadd
_scaleadd
_relu_fp16
)
set
(
target 1
)
endif
()
endforeach
()
example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp
View file @
c3738ce3
...
...
@@ -190,9 +190,8 @@ bool run_grouped_conv_fwd(bool do_verification,
if
(
!
conv
.
IsSupportedArgument
(
argument
))
{
throw
std
::
runtime_error
(
"wrong! device_conv with the specified compilation parameters does "
"not support this Conv problem"
);
throw
std
::
runtime_error
(
"The device op with the specified compilation parameters does "
"not support this convolution problem."
);
}
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
...
...
example/62_conv_fwd_activ/convnd_fwd_xdl_scaleadd
x2
_relu_fp16.cpp
→
example/62_conv_fwd_activ/convnd_fwd_xdl_scaleadd
_scaleadd
_relu_fp16.cpp
View file @
c3738ce3
...
...
@@ -97,7 +97,7 @@ using DeviceGroupedConvNDFwdInstance =
using
DeviceGroupedConvNDFwdActivInstance
=
DeviceGroupedConvNDFwdInstance
<
OutElementOp
>
;
namespace
{
// Use
own
implementation to pass two more tensors for post op
// Use
custom
implementation to pass two more tensors for post op
template
<
ck
::
index_t
NDimSpatial
,
typename
InDataType
,
typename
WeiDataType
,
...
...
@@ -181,7 +181,7 @@ bool run_grouped_conv_fwd(bool do_verification,
copy
(
conv_param
.
input_right_pads_
,
input_right_pads
);
const
std
::
array
<
const
void
*
,
NumDs
>
ds
=
{
d0_buf
.
GetDeviceBuffer
(),
d1_buf
.
GetDeviceBuffer
()};
// do Conv
auto
conv
=
DeviceConvNDFwdInstance
{};
auto
invoker
=
conv
.
MakeInvoker
();
auto
argument
=
conv
.
MakeArgument
(
in_device_buf
.
GetDeviceBuffer
(),
...
...
@@ -208,9 +208,8 @@ bool run_grouped_conv_fwd(bool do_verification,
if
(
!
conv
.
IsSupportedArgument
(
argument
))
{
throw
std
::
runtime_error
(
"wrong! device_conv with the specified compilation parameters does "
"not support this Conv problem"
);
throw
std
::
runtime_error
(
"The device op with the specified compilation parameters does "
"not support this convolution problem."
);
}
float
avg_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
});
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
c3738ce3
...
...
@@ -59,11 +59,11 @@ struct ReferenceConvFwd : public device::BaseOperator
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDTensor
>&
postop
_tensors
)
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDTensor
>&
d
_tensors
)
:
input_
{
input
},
weight_
{
weight
},
output_
{
output
},
postop
_tensors_
{
postop
_tensors
},
d
_tensors_
{
d
_tensors
},
conv_strides_
{
conv_filter_strides
},
conv_dilations_
{
conv_filter_dilations
},
in_left_pads_
{
input_left_pads
},
...
...
@@ -78,7 +78,7 @@ struct ReferenceConvFwd : public device::BaseOperator
const
Tensor
<
WeiDataType
>&
weight_
;
Tensor
<
OutDataType
>&
output_
;
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDTensor
>&
postop
_tensors_
;
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDTensor
>&
d
_tensors_
;
std
::
vector
<
index_t
>
conv_strides_
;
std
::
vector
<
index_t
>
conv_dilations_
;
...
...
@@ -141,19 +141,18 @@ struct ReferenceConvFwd : public device::BaseOperator
}
else
if
constexpr
(
NumDTensor
==
1
)
{
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
postop_tensors_
[
0
](
g
,
n
,
k
,
wo
));
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
d_tensors_
[
0
](
g
,
n
,
k
,
wo
));
}
else
if
constexpr
(
NumDTensor
==
2
)
{
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
postop
_tensors_
[
0
](
g
,
n
,
k
,
wo
),
arg
.
postop
_tensors_
[
1
](
g
,
n
,
k
,
wo
));
arg
.
d
_tensors_
[
0
](
g
,
n
,
k
,
wo
),
arg
.
d
_tensors_
[
1
](
g
,
n
,
k
,
wo
));
}
else
{
throw
std
::
runtime_error
(
"ElementOp not supported in reference."
);
throw
std
::
runtime_error
(
"
Output
ElementOp not supported in reference."
);
}
arg
.
output_
(
g
,
n
,
k
,
wo
)
=
v_out
;
};
...
...
@@ -216,18 +215,18 @@ struct ReferenceConvFwd : public device::BaseOperator
else
if
constexpr
(
NumDTensor
==
1
)
{
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
postop
_tensors_
[
0
](
g
,
n
,
k
,
ho
,
wo
));
v_out
,
v_acc_converted
,
arg
.
d
_tensors_
[
0
](
g
,
n
,
k
,
ho
,
wo
));
}
else
if
constexpr
(
NumDTensor
==
2
)
{
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
postop
_tensors_
[
0
](
g
,
n
,
k
,
ho
,
wo
),
arg
.
postop
_tensors_
[
1
](
g
,
n
,
k
,
ho
,
wo
));
arg
.
d
_tensors_
[
0
](
g
,
n
,
k
,
ho
,
wo
),
arg
.
d
_tensors_
[
1
](
g
,
n
,
k
,
ho
,
wo
));
}
else
{
throw
std
::
runtime_error
(
"ElementOp not supported in reference."
);
throw
std
::
runtime_error
(
"
Output
ElementOp not supported in reference."
);
}
arg
.
output_
(
g
,
n
,
k
,
ho
,
wo
)
=
v_out
;
};
...
...
@@ -303,18 +302,18 @@ struct ReferenceConvFwd : public device::BaseOperator
else
if
constexpr
(
NumDTensor
==
1
)
{
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
postop
_tensors_
[
0
](
g
,
n
,
k
,
d_o
,
ho
,
wo
));
v_out
,
v_acc_converted
,
arg
.
d
_tensors_
[
0
](
g
,
n
,
k
,
d_o
,
ho
,
wo
));
}
else
if
constexpr
(
NumDTensor
==
2
)
{
arg
.
out_element_op_
(
v_out
,
v_acc_converted
,
arg
.
postop
_tensors_
[
0
](
g
,
n
,
k
,
d_o
,
ho
,
wo
),
arg
.
postop
_tensors_
[
1
](
g
,
n
,
k
,
d_o
,
ho
,
wo
));
arg
.
d
_tensors_
[
0
](
g
,
n
,
k
,
d_o
,
ho
,
wo
),
arg
.
d
_tensors_
[
1
](
g
,
n
,
k
,
d_o
,
ho
,
wo
));
}
else
{
throw
std
::
runtime_error
(
"ElementOp not supported in reference."
);
throw
std
::
runtime_error
(
"
Output
ElementOp not supported in reference."
);
}
arg
.
output_
(
g
,
n
,
k
,
d_o
,
ho
,
wo
)
=
v_out
;
};
...
...
@@ -360,7 +359,7 @@ struct ReferenceConvFwd : public device::BaseOperator
InElementwiseOperation
in_element_op
,
WeiElementwiseOperation
wei_element_op
,
OutElementwiseOperation
out_element_op
,
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDTensor
>&
postop
_tensors
=
{})
const
std
::
array
<
Tensor
<
OutDataType
>
,
NumDTensor
>&
d
_tensors
=
{})
{
return
Argument
{
input
,
weight
,
...
...
@@ -372,7 +371,7 @@ struct ReferenceConvFwd : public device::BaseOperator
in_element_op
,
wei_element_op
,
out_element_op
,
postop
_tensors
};
d
_tensors
};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleaddx2_relu_instance.hpp
View file @
c3738ce3
...
...
@@ -43,7 +43,7 @@ template <index_t NDimSpatial,
typename
DsLayout
,
typename
ELayout
,
ConvolutionForwardSpecialization
ConvSpec
>
using
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_bf16_instances
=
std
::
tuple
<
using
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_bf16_instances
=
std
::
tuple
<
// clang-format off
//########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
//########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
...
...
@@ -65,7 +65,7 @@ template <index_t NDimSpatial,
typename
DsLayout
,
typename
ELayout
,
ConvolutionForwardSpecialization
ConvSpec
>
using
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f16_instances
=
std
::
tuple
<
using
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f16_instances
=
std
::
tuple
<
// clang-format off
//########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
//########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
...
...
@@ -87,7 +87,7 @@ template <index_t NDimSpatial,
typename
DsLayout
,
typename
ELayout
,
ConvolutionForwardSpecialization
ConvSpec
>
using
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f32_instances
=
std
::
tuple
<
using
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f32_instances
=
std
::
tuple
<
// clang-format off
//########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
//########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
...
...
@@ -109,7 +109,7 @@ template <index_t NDimSpatial,
typename
DsLayout
,
typename
ELayout
,
ConvolutionForwardSpecialization
ConvSpec
>
using
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_int8_instances
=
std
::
tuple
<
using
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_int8_instances
=
std
::
tuple
<
// clang-format off
//########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| NumGemmK| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
//########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Prefetch| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MWaveMPerXdl| ScalarPerVector|
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd
x2
_relu.hpp
→
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd
_scaleadd
_relu.hpp
View file @
c3738ce3
...
...
@@ -5,6 +5,7 @@
#include <vector>
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
...
...
@@ -22,7 +23,7 @@ using ScaleAddScaleAddRelu = ck::tensor_operation::element_wise::ScaleAddScaleAd
#ifdef CK_ENABLE_BF16
// grouped conv3d forward, NDHWGC/GKZYXC/NDHWGK
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -38,7 +39,7 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_bf16
#endif
#ifdef CK_ENABLE_FP16
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -54,7 +55,7 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_f16_
#endif
#ifdef CK_ENABLE_FP32
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -70,7 +71,7 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_f32_
#endif
#ifdef CK_ENABLE_INT8
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -135,7 +136,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
WeiDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
{
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
op_ptrs
);
}
#endif
...
...
@@ -143,7 +144,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
WeiDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
&&
is_same_v
<
ComputeType
,
half_t
>
)
{
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
op_ptrs
);
}
#endif
...
...
@@ -151,7 +152,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
WeiDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
{
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
op_ptrs
);
}
#endif
...
...
@@ -159,7 +160,7 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
WeiDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
op_ptrs
);
}
#endif
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt
0 → 100644
View file @
c3738ce3
set
(
GROUPED_CONV3D_FWD_scaleadd_scaleadd_RELU
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
)
add_instance_library
(
device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance
${
GROUPED_CONV3D_FWD_scaleadd_scaleadd_RELU
}
)
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
x2
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
_scaleadd
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
View file @
c3738ce3
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
...
...
@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -25,28 +25,28 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_bf16
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
x2
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
_scaleadd
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
View file @
c3738ce3
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
...
...
@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -25,28 +25,28 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_f16_
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
x2
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
_scaleadd
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
View file @
c3738ce3
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
...
...
@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -25,28 +25,28 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_f32_
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
x2
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
→
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd
_scaleadd
_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
View file @
c3738ce3
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
x2
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
void
add_device_grouped_conv3d_fwd_xdl_scaleadd
_scaleadd
_relu_ndhwgc_gkzyxc_ndhwgk_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
3
,
NDHWGC
,
GKZYXC
,
...
...
@@ -24,28 +24,28 @@ void add_device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_int8
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd
x2
_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
device_grouped_conv_fwd_xdl_scaleadd
_scaleadd
_relu_int8_instances
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<
NDHWGK
,
NDHWGK
>
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleaddx2_relu/CMakeLists.txt
deleted
100644 → 0
View file @
eb898ad6
set
(
GROUPED_CONV3D_FWD_SCALEADDX2_RELU
xdl/device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_scaleaddx2_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
)
add_instance_library
(
device_grouped_conv3d_fwd_scaleaddx2_relu_instance
${
GROUPED_CONV3D_FWD_SCALEADDX2_RELU
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment