Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
6257e347
Commit
6257e347
authored
Jul 02, 2022
by
Chao Liu
Browse files
clean
parent
ac876c6f
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
43 additions
and
59 deletions
+43
-59
include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp
...eration/gpu/device/convolution_forward_specialization.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp
...ration/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp
+2
-1
include/ck/tensor_operation/gpu/device/gemm_specialization.hpp
...de/ck/tensor_operation/gpu/device/gemm_specialization.hpp
+16
-0
include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
...r_operation/gpu/element/binary_element_wise_operation.hpp
+0
-33
profiler/include/profile_gemm_bilinear_impl.hpp
profiler/include/profile_gemm_bilinear_impl.hpp
+12
-12
profiler/src/profile_gemm_add_add_fastgelu.cpp
profiler/src/profile_gemm_add_add_fastgelu.cpp
+1
-1
profiler/src/profile_gemm_bilinear.cpp
profiler/src/profile_gemm_bilinear.cpp
+8
-8
No files found.
include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp
View file @
6257e347
...
...
@@ -18,7 +18,7 @@ enum struct ConvolutionForwardSpecialization
OddC
,
};
inline
std
::
string
getConvF
w
dSpecializationStr
(
const
ConvolutionForwardSpecialization
&
s
)
inline
std
::
string
getConvF
orwar
dSpecializationStr
ing
(
const
ConvolutionForwardSpecialization
&
s
)
{
switch
(
s
)
{
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
6257e347
...
...
@@ -871,7 +871,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
<<
MPerBlock
<<
", "
<<
NPerBlock
<<
", "
<<
K0PerBlock
<<
", "
<<
getConvF
w
dSpecializationStr
(
ConvForwardSpecialization
)
<<
getConvF
orwar
dSpecializationStr
ing
(
ConvForwardSpecialization
)
<<
">"
;
// clang-format on
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
6257e347
...
...
@@ -711,7 +711,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
<<
MPerBlock
<<
", "
<<
NPerBlock
<<
", "
<<
K0PerBlock
<<
", "
<<
getConvF
w
dSpecializationStr
(
ConvForwardSpecialization
)
<<
getConvF
orwar
dSpecializationStr
ing
(
ConvForwardSpecialization
)
<<
">"
;
// clang-format on
...
...
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
6257e347
...
...
@@ -1033,7 +1033,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
<<
MPerBlock
<<
", "
<<
NPerBlock
<<
", "
<<
K0PerBlock
<<
", "
<<
getConvF
w
dSpecializationStr
(
ConvForwardSpecialization
)
<<
getConvF
orwar
dSpecializationStr
ing
(
ConvForwardSpecialization
)
<<
">"
;
// clang-format on
...
...
include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp
View file @
6257e347
...
...
@@ -746,7 +746,8 @@ struct DeviceGemmMultipleD_Xdl_CShuffle : public DeviceGemmMultipleD<ALayout,
<<
NPerBlock
<<
", "
<<
KPerBlock
<<
", "
<<
AK1
<<
", "
<<
BK1
<<
BK1
<<
", "
<<
getGemmSpecializationString
(
GemmSpec
)
<<
">"
;
// clang-format on
...
...
include/ck/tensor_operation/gpu/device/gemm_specialization.hpp
View file @
6257e347
...
...
@@ -19,6 +19,22 @@ enum struct GemmSpecialization
MNKPadding
,
};
inline
std
::
string
getGemmSpecializationString
(
const
GemmSpecialization
&
s
)
{
switch
(
s
)
{
case
GemmSpecialization
::
Default
:
return
"Default"
;
case
GemmSpecialization
::
MPadding
:
return
"MPadding"
;
case
GemmSpecialization
::
NPadding
:
return
"NPadding"
;
case
GemmSpecialization
::
KPadding
:
return
"KPadding"
;
case
GemmSpecialization
::
MNPadding
:
return
"MNPadding"
;
case
GemmSpecialization
::
MKPadding
:
return
"MKPadding"
;
case
GemmSpecialization
::
NKPadding
:
return
"NKPadding"
;
case
GemmSpecialization
::
MNKPadding
:
return
"MNKPadding"
;
default:
return
"Unrecognized specialization!"
;
}
}
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
View file @
6257e347
...
...
@@ -35,7 +35,6 @@ struct Add
y
=
type_convert
<
half_t
>
(
x0
)
+
x1
;
};
// Question: should half_t be supported ?
template
<
>
__host__
__device__
constexpr
void
operator
()
<
half_t
>
(
half_t
&
y
,
const
half_t
&
x0
,
const
half_t
&
x1
)
const
...
...
@@ -43,7 +42,6 @@ struct Add
y
=
x0
+
x1
;
};
// Question: should bhalf_t be supported ?
template
<
>
__host__
__device__
constexpr
void
operator
()
<
bhalf_t
>
(
bhalf_t
&
y
,
const
bhalf_t
&
x0
,
const
bhalf_t
&
x1
)
const
...
...
@@ -74,7 +72,6 @@ struct Subtract
y
=
x0
-
x1
;
};
// Question: should half_t be supported ?
template
<
>
__host__
__device__
constexpr
void
operator
()
<
half_t
>
(
half_t
&
y
,
const
half_t
&
x0
,
const
half_t
&
x1
)
const
...
...
@@ -82,7 +79,6 @@ struct Subtract
y
=
x0
-
x1
;
};
// Question: should bhalf_t be supported ?
template
<
>
__host__
__device__
constexpr
void
operator
()
<
bhalf_t
>
(
bhalf_t
&
y
,
const
bhalf_t
&
x0
,
const
bhalf_t
&
x1
)
const
...
...
@@ -98,32 +94,6 @@ struct Bilinear
{
Bilinear
(
float
alpha
,
float
beta
)
:
alpha_
(
alpha
),
beta_
(
beta
){};
#if 0
template <typename T>
__host__ __device__ constexpr void operator()(T& y, const T& x0, const T& x1) const;
template <>
__host__ __device__ constexpr void
operator()<float>(float& y, const float& x0, const float& x1) const
{
y = alpha_ * x0 + beta_ * x1;
};
template <>
__host__ __device__ constexpr void
operator()<double>(double& y, const double& x0, const double& x1) const
{
y = type_convert<double>(alpha_) * x0 + type_convert<double>(beta_) * x1;
};
template <>
__host__ __device__ constexpr void
operator()<half_t>(half_t& y, const half_t& x0, const half_t& x1) const
{
y = type_convert<half_t>(alpha_ * type_convert<float>(x0) +
beta_ * type_convert<float>(x1));
};
#else
template
<
typename
Y
,
typename
X0
,
typename
X1
>
__host__
__device__
constexpr
void
operator
()(
Y
&
,
const
X0
&
,
const
X1
&
)
const
;
...
...
@@ -140,7 +110,6 @@ struct Bilinear
{
y
=
type_convert
<
half_t
>
(
alpha_
*
x0
+
beta_
*
ck
::
type_convert
<
float
>
(
x1
));
};
#endif
float
alpha_
;
float
beta_
;
...
...
@@ -167,7 +136,6 @@ struct AddRelu
y
=
a
>
0.0
?
a
:
0.0
;
};
// Question: should half_t be supported ?
template
<
>
__host__
__device__
constexpr
void
operator
()
<
half_t
>
(
half_t
&
y
,
const
half_t
&
x0
,
const
half_t
&
x1
)
const
...
...
@@ -202,7 +170,6 @@ struct AddHardswish
y
=
c
;
};
// Question: should half_t be supported ?
template
<
>
__host__
__device__
constexpr
void
operator
()
<
half_t
>
(
half_t
&
y
,
const
half_t
&
x0
,
const
half_t
&
x1
)
const
...
...
profiler/include/profile_gemm_bilinear_impl.hpp
View file @
6257e347
...
...
@@ -30,18 +30,18 @@ template <typename ADataType,
typename
BLayout
,
typename
DELayout
>
// assume Ds and E have same layout
bool
profile_gemm_bilinear_impl
(
int
do_verification
,
int
init_method
,
bool
/*do_log*/
,
bool
time_kernel
,
int
M
,
int
N
,
int
K
,
int
StrideA
,
int
StrideB
,
int
StrideD
,
int
StrideE
,
float
alpha
,
float
beta
)
int
init_method
,
bool
/*do_log*/
,
bool
time_kernel
,
int
M
,
int
N
,
int
K
,
int
StrideA
,
int
StrideB
,
int
StrideD
,
int
StrideE
,
float
alpha
,
float
beta
)
{
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
...
...
profiler/src/profile_gemm_add_add_fastgelu.cpp
View file @
6257e347
...
...
@@ -29,7 +29,7 @@ int profile_gemm_add_add_fastgelu(int argc, char* argv[])
if
(
argc
!=
16
)
{
// clang-format off
printf
(
"arg1: tensor operation (gemm_add_add_fastgelu: GEMM+Add+Add+GeLU)
\n
"
);
printf
(
"arg1: tensor operation (gemm_add_add_fastgelu: GEMM+Add+Add+
Fast
GeLU)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: E[m, n] = FastGeLU(A[m, k] * B[k, n] + D0[m, n] + D1[m, n]);
\n
"
);
printf
(
" 1: E[m, n] = FastGeLU(A[m, k] * B[n, k] + D0[m, n] + D1[m, n]);
\n
"
);
...
...
profiler/src/profile_gemm_bilinear.cpp
View file @
6257e347
...
...
@@ -29,7 +29,7 @@ int profile_gemm_bilinear(int argc, char* argv[])
if
(
argc
!=
17
)
{
// clang-format off
printf
(
"arg1: tensor operation (gemm_
add_add_fastgelu: GEMM+Add+Add+GeLU
)
\n
"
);
printf
(
"arg1: tensor operation (gemm_
bilinear: GEMM+Bilinear
)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)
\n
"
);
printf
(
"arg3: matrix layout (0: E[m, n] = alpha * A[m, k] * B[k, n] + beta * D[m, n];
\n
"
);
printf
(
" 1: E[m, n] = alpha * A[m, k] * B[n, k] + beta * D[m, n];
\n
"
);
...
...
@@ -94,13 +94,13 @@ int profile_gemm_bilinear(int argc, char* argv[])
const
int
DefaultStrideE
=
ck
::
is_same_v
<
DELayout
,
Row
>
?
N
:
M
;
bool
pass
=
ck
::
profiler
::
profile_gemm_bilinear_impl
<
ADataType
,
BDataType
,
AccDataType
,
DDataType
,
EDataType
,
ALayout
,
BLayout
,
DELayout
>
(
BDataType
,
AccDataType
,
DDataType
,
EDataType
,
ALayout
,
BLayout
,
DELayout
>
(
do_verification
,
init_method
,
do_log
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment