Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
09d4c3a4
You need to sign in or sign up before continuing.
Commit
09d4c3a4
authored
Oct 01, 2024
by
illsilin
Browse files
merge from public repo
parents
171ed358
8e4c3fb1
Changes
202
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1747 additions
and
18 deletions
+1747
-18
profiler/src/profile_grouped_conv_fwd.cpp
profiler/src/profile_grouped_conv_fwd.cpp
+19
-0
profiler/src/profile_max_pool2d_bwd.cpp
profiler/src/profile_max_pool2d_bwd.cpp
+178
-0
profiler/src/profile_max_pool2d_fwd.cpp
profiler/src/profile_max_pool2d_fwd.cpp
+310
-0
profiler/src/profile_pool3d_fwd.cpp
profiler/src/profile_pool3d_fwd.cpp
+331
-0
script/cmake-ck-dev.sh
script/cmake-ck-dev.sh
+3
-0
script/cmake-ck-release.sh
script/cmake-ck-release.sh
+3
-0
script/convert_miopen_driver_to_profiler.py
script/convert_miopen_driver_to_profiler.py
+2
-0
test/CMakeLists.txt
test/CMakeLists.txt
+1
-0
test/ck_tile/CMakeLists.txt
test/ck_tile/CMakeLists.txt
+1
-0
test/ck_tile/image_to_column/CMakeLists.txt
test/ck_tile/image_to_column/CMakeLists.txt
+4
-0
test/ck_tile/image_to_column/test_tile_image_to_column.cpp
test/ck_tile/image_to_column/test_tile_image_to_column.cpp
+142
-0
test/gemm_universal/test_gemm_universal_ut_cases.inc
test/gemm_universal/test_gemm_universal_ut_cases.inc
+128
-0
test/gemm_universal/test_gemm_universal_xdl.cpp
test/gemm_universal/test_gemm_universal_xdl.cpp
+25
-0
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+3
-1
test/pool/CMakeLists.txt
test/pool/CMakeLists.txt
+12
-0
test/pool/test_avg_pool2d_bwd.cpp
test/pool/test_avg_pool2d_bwd.cpp
+133
-0
test/pool/test_avg_pool2d_fwd.cpp
test/pool/test_avg_pool2d_fwd.cpp
+145
-0
test/pool/test_avg_pool3d_fwd.cpp
test/pool/test_avg_pool3d_fwd.cpp
+18
-17
test/pool/test_max_pool2d_bwd.cpp
test/pool/test_max_pool2d_bwd.cpp
+139
-0
test/pool/test_max_pool2d_fwd.cpp
test/pool/test_max_pool2d_fwd.cpp
+150
-0
No files found.
profiler/src/profile_grouped_conv_fwd.cpp
View file @
09d4c3a4
...
...
@@ -15,6 +15,7 @@ enum struct ConvLayout
{
GNHWC_GKYXC_GNHWK
,
// 0
NHWGC_GKYXC_NHWGK
,
// 1
NGCHW_GKYXC_NGKHW
,
// 2
};
enum
struct
ConvDataType
...
...
@@ -54,6 +55,8 @@ static void print_helper_msg()
<<
"arg3: indexing data type (0: 32-bit, 1: 64-bit)
\n
"
<<
"arg4: tensor layout (0: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, N, Ho, Wo, K]
\n
"
<<
" 1: Input[N, Hi, Wi, G, C], Weight[G, K, Y, X, C], Output[N, Ho, Wo, G, K])
\n
"
<<
" 2: Input[N, G, C, Hi, Wi], Weight[G, K, Y, X, C], Output[N, "
"G, K, Ho, Wo]
\n
"
<<
"arg5: verification (0: no, 1: yes)
\n
"
<<
"arg6: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg7: print tensor value (0: no; 1: yes)
\n
"
...
...
@@ -111,6 +114,11 @@ int profile_grouped_conv_fwd(int argc, char* argv[])
using
GNHWK
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
GNDHWK
=
ck
::
tensor_layout
::
convolution
::
GNDHWK
;
//
using
NGCHW
=
ck
::
tensor_layout
::
convolution
::
NGCHW
;
using
NGKHW
=
ck
::
tensor_layout
::
convolution
::
NGKHW
;
//
using
NWGC
=
ck
::
tensor_layout
::
convolution
::
NWGC
;
using
NHWGC
=
ck
::
tensor_layout
::
convolution
::
NHWGC
;
...
...
@@ -284,6 +292,17 @@ int profile_grouped_conv_fwd(int argc, char* argv[])
return
profile
(
I2
,
NHWGC
{},
GKYXC
{},
NHWGK
{},
INT8
{},
INT8
{},
INT8
{},
INT8
{},
INT8
{});
}
}
else
if
(
num_dim_spatial
==
2
&&
layout
==
ConvLayout
::
NGCHW_GKYXC_NGKHW
)
{
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
{
return
profile
(
I2
,
NGCHW
{},
GKYXC
{},
NGKHW
{},
F32
{},
F32
{},
F32
{},
F32
{},
F32
{});
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
)
{
return
profile
(
I2
,
NGCHW
{},
GKYXC
{},
NGKHW
{},
F16
{},
F16
{},
F16
{},
F16
{},
F16
{});
}
}
else
if
(
num_dim_spatial
==
3
&&
layout
==
ConvLayout
::
NHWGC_GKYXC_NHWGK
)
{
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
...
...
profiler/src/profile_max_pool2d_bwd.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <vector>
#include <unordered_map>
#include "profiler/data_type_enum.hpp"
#include "profiler/profile_max_pool2d_bwd_impl.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "profiler_operation_registry.hpp"
using
ck
::
index_t
;
struct
maxPoolbwdArgParser
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
long_opts
=
{{
"length"
,
{}},
{
"wsize"
,
{}},
{
"wstride"
,
{}},
{
"wdilation"
,
{}},
{
"pad1"
,
{}},
{
"pad2"
,
{}}};
bool
parse_opt
(
int
argc
,
char
*
argv
[],
const
std
::
string
&
key
,
int
i
)
{
if
(
std
::
string
(
"--"
)
+
key
==
argv
[
i
])
{
int
pos
=
i
;
while
(
++
i
<
argc
&&
argv
[
i
][
0
]
!=
'-'
)
{}
int
end
=
i
;
for
(
int
j
=
pos
+
1
;
j
<
end
;
j
++
)
{
long_opts
[
key
].
push_back
(
std
::
stoi
(
argv
[
j
]));
}
return
true
;
}
return
false
;
}
void
operator
()(
int
argc
,
char
*
argv
[])
{
for
(
auto
&
kv
:
long_opts
)
{
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
if
(
parse_opt
(
argc
,
argv
,
kv
.
first
,
i
))
break
;
}
}
}
};
void
print_help_max_pool2d_bwd
()
{
std
::
cout
<<
"arg1: data type (0: fp16; 1: fp32; 3: int8; 5: bf16)
\n
"
<<
"arg2: verification (0: no; 1: yes)
\n
"
<<
"arg3: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
<<
"arg4: print tensor value (0: no; 1: yes)
\n
"
<<
"arg5: time kernel (0=no, 1=yes)
\n
"
<<
"--length: input tensor length for NCHW(e.g, --length 2 32 30 30)
\n
"
<<
"--wsize: window size for YX (e.g, --wsize 2 2)
\n
"
<<
"--wstride: window stride for HW (e.g, --wstride 2 2)
\n
"
<<
"--wdilation: window dilation for HW (e.g, --wdilation 1 1)
\n
"
<<
"--pad1: left side of padding in HW (e.g, --pad1 1 1)
\n
"
<<
"--pad2: right side of padding in HW (e.g, --pad2 1 1)
\n
"
<<
"eg: ckProfiler max_pool2d_bwd 0 1 2 0 --length 2 32 30 30 --wsize 2 2 "
"--wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1"
<<
std
::
endl
;
}
int
profile_max_pool2d_bwd
(
int
argc
,
char
*
argv
[])
{
ck
::
DataTypeEnum
data_type
=
ck
::
DataTypeEnum
::
Half
;
bool
do_verification
=
true
;
int
init_method
=
2
;
bool
do_log
=
false
;
bool
time_kernel
=
true
;
std
::
vector
<
index_t
>
in_length
=
{
2
,
32
,
30
,
30
};
std
::
vector
<
index_t
>
wsize
=
{
2
,
2
};
std
::
vector
<
index_t
>
wstride
=
{
2
,
2
};
std
::
vector
<
index_t
>
wdilation
=
{
1
,
1
};
std
::
vector
<
index_t
>
pad1
=
{
1
,
1
};
std
::
vector
<
index_t
>
pad2
=
{
1
,
1
};
if
(
argc
!=
2
&&
argc
!=
33
)
{
print_help_max_pool2d_bwd
();
return
0
;
}
else
if
(
argc
==
33
)
{
data_type
=
static_cast
<
ck
::
DataTypeEnum
>
(
std
::
stoi
(
argv
[
2
]));
do_verification
=
std
::
stoi
(
argv
[
3
]);
init_method
=
std
::
stoi
(
argv
[
4
]);
do_log
=
std
::
stoi
(
argv
[
5
]);
time_kernel
=
std
::
stoi
(
argv
[
6
]);
// parse the long options
maxPoolbwdArgParser
arg_parser
;
arg_parser
(
argc
,
argv
);
in_length
=
arg_parser
.
long_opts
[
"length"
];
wsize
=
arg_parser
.
long_opts
[
"wsize"
];
wstride
=
arg_parser
.
long_opts
[
"wstride"
];
wdilation
=
arg_parser
.
long_opts
[
"wdilation"
];
pad1
=
arg_parser
.
long_opts
[
"pad1"
];
pad2
=
arg_parser
.
long_opts
[
"pad2"
];
}
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
F32
=
float
;
using
I8
=
int8_t
;
using
I32
=
int32_t
;
if
(
data_type
==
ck
::
DataTypeEnum
::
Half
)
{
ck
::
profiler
::
profile_max_pool2d_bwd_impl
<
F16
,
F16
,
I32
,
F16
,
F16
,
false
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
BFloat16
)
{
ck
::
profiler
::
profile_max_pool2d_bwd_impl
<
BF16
,
BF16
,
I32
,
BF16
,
BF16
,
false
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Float
)
{
ck
::
profiler
::
profile_max_pool2d_bwd_impl
<
F32
,
F32
,
I32
,
F32
,
F32
,
false
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Int8
)
{
ck
::
profiler
::
profile_max_pool2d_bwd_impl
<
I8
,
I8
,
I32
,
I8
,
I8
,
false
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
{
throw
std
::
runtime_error
(
"not implemented yet"
);
}
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"max_pool2d_bwd"
,
"max_pool2d bwd"
,
profile_max_pool2d_bwd
);
profiler/src/profile_max_pool
3
d_fwd.cpp
→
profiler/src/profile_max_pool
2
d_fwd.cpp
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 20
18-2023
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 20
24
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <vector>
#include <unordered_map>
#include "profiler/data_type_enum.hpp"
#include "profiler/profile_pool
3
d_fwd_impl.hpp"
#include "profiler/profile_pool
2
d_fwd_impl.hpp"
#include "profiler_operation_registry.hpp"
using
ck
::
index_t
;
...
...
@@ -49,49 +49,58 @@ struct maxPoolFwdArgParser
}
};
void
print_help_max_pool3d_fwd
()
enum
struct
PoolDataType
{
std
::
cout
<<
"arg1: data type (0: fp16; 1: fp32; 5: bf16)
\n
"
F32
=
0
,
BF16
,
F16
,
INT8
,
F8
,
};
void
print_help_max_pool2d_fwd
()
{
std
::
cout
<<
"arg1: data type (0: fp16; 1: fp32; 2: bf16; 3: int8; 4: fp8)
\n
"
<<
"arg2: verification (0: no; 1: yes)
\n
"
<<
"arg3: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
<<
"arg4: print tensor value (0: no; 1: yes)
\n
"
<<
"arg5: time kernel (0=no, 1=yes)
\n
"
<<
"arg6: return index (0=no, 1=yes)
\n
"
<<
"--length: input tensor length for NC
D
HW(e.g, --length 2 32 30
30
30)
\n
"
<<
"--wsize: window size for
Z
YX (e.g, --wsize 2
2
2)
\n
"
<<
"--wstride: window stride for
D
HW (e.g, --wstride 2
2
2)
\n
"
<<
"--wdilation: window dilation for
D
HW (e.g, --wdilation 1
1
1)
\n
"
<<
"--pad1: left side of padding in
D
HW (e.g, --pad1 1
1
1)
\n
"
<<
"--pad2: right side of padding in
D
HW (e.g, --pad2 1
1
1)
\n
"
<<
"eg: ckProfiler max_pool
3
d_fwd 0 1 2 0 1 0 --length 2 32 30 30
30
--wsize 2 2
2
"
"--wstride 2 2
2
--wdilation 1 1
1
--pad1 1 1
1
--pad2 1
1
1"
<<
"--length: input tensor length for NCHW(e.g, --length 2 32 30 30)
\n
"
<<
"--wsize: window size for YX (e.g, --wsize 2 2)
\n
"
<<
"--wstride: window stride for HW (e.g, --wstride 2 2)
\n
"
<<
"--wdilation: window dilation for HW (e.g, --wdilation 1 1)
\n
"
<<
"--pad1: left side of padding in HW (e.g, --pad1 1 1)
\n
"
<<
"--pad2: right side of padding in HW (e.g, --pad2 1 1)
\n
"
<<
"eg: ckProfiler max_pool
2
d_fwd 0 1 2 0 1 0 --length 2 32 30 30 --wsize 2 2"
"--wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1"
<<
std
::
endl
;
}
int
profile_max_pool
3
d_fwd
(
int
argc
,
char
*
argv
[])
int
profile_max_pool
2
d_fwd
(
int
argc
,
char
*
argv
[])
{
ck
::
DataType
Enum
data_type
=
ck
::
DataType
Enum
::
Half
;
bool
do_verification
=
true
;
int
init_method
=
0
;
bool
do_log
=
false
;
bool
time_kernel
=
true
;
bool
return_index
=
false
;
std
::
vector
<
index_t
>
in_length
=
{
2
,
32
,
30
,
30
,
30
};
std
::
vector
<
index_t
>
wsize
=
{
2
,
2
,
2
};
std
::
vector
<
index_t
>
wstride
=
{
2
,
2
,
2
};
std
::
vector
<
index_t
>
wdilation
=
{
1
,
1
,
1
};
std
::
vector
<
index_t
>
pad1
=
{
1
,
1
,
1
};
std
::
vector
<
index_t
>
pad2
=
{
1
,
1
,
1
};
if
(
argc
!=
2
&&
argc
!=
34
)
Pool
DataType
data_type
=
Pool
DataType
::
F32
;
bool
do_verification
=
true
;
int
init_method
=
0
;
bool
do_log
=
false
;
bool
time_kernel
=
true
;
bool
return_index
=
false
;
std
::
vector
<
index_t
>
in_length
=
{
2
,
32
,
30
,
30
};
std
::
vector
<
index_t
>
wsize
=
{
2
,
2
};
std
::
vector
<
index_t
>
wstride
=
{
2
,
2
};
std
::
vector
<
index_t
>
wdilation
=
{
1
,
1
};
std
::
vector
<
index_t
>
pad1
=
{
1
,
1
};
std
::
vector
<
index_t
>
pad2
=
{
1
,
1
};
if
(
argc
!=
2
&&
argc
!=
28
)
{
print_help_max_pool
3
d_fwd
();
print_help_max_pool
2
d_fwd
();
return
0
;
}
else
if
(
argc
==
34
)
else
if
(
argc
==
28
)
{
data_type
=
static_cast
<
ck
::
DataType
Enum
>
(
std
::
stoi
(
argv
[
2
]));
data_type
=
static_cast
<
Pool
DataType
>
(
std
::
stoi
(
argv
[
2
]));
do_verification
=
std
::
stoi
(
argv
[
3
]);
init_method
=
std
::
stoi
(
argv
[
4
]);
do_log
=
std
::
stoi
(
argv
[
5
]);
...
...
@@ -109,32 +118,22 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
pad2
=
arg_parser
.
long_opts
[
"pad2"
];
}
#ifdef CK_ENABLE_FP16
using
F16
=
ck
::
half_t
;
#endif
#ifdef CK_ENABLE_BF16
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
#endif
#ifdef CK_ENABLE_FP32
using
F32
=
float
;
#endif
using
I32
=
int32_t
;
using
NDHWC
=
ck
::
tensor_layout
::
convolution
::
NDHWC
;
#if 1
using
F32
=
float
;
using
I32
=
int32_t
;
using
F8
=
ck
::
f8_t
;
using
I8
=
int8_t
;
using
NHWC
=
ck
::
tensor_layout
::
convolution
::
NHWC
;
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
MAX
;
#else
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AVG
;
#endif
if
(
false
)
;
#ifdef CK_ENABLE_FP16
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Half
)
if
(
data_type
==
PoolDataType
::
F16
)
{
if
(
return_index
)
{
ck
::
profiler
::
profile_pool
3
d_fwd_impl
<
F16
,
F16
,
F16
,
I32
,
N
D
HWC
,
N
D
HWC
,
ReduceOpId
,
false
,
true
>
(
profile_pool
2
d_fwd_impl
<
F16
,
F16
,
F16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
true
>
(
do_verification
,
init_method
,
do_log
,
...
...
@@ -145,9 +144,11 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
wdilation
,
pad1
,
pad2
);
}
else
{
ck
::
profiler
::
profile_pool
3
d_fwd_impl
<
F16
,
F16
,
F16
,
I32
,
N
D
HWC
,
N
D
HWC
,
ReduceOpId
,
false
,
false
>
(
profile_pool
2
d_fwd_impl
<
F16
,
F16
,
F16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
false
>
(
do_verification
,
init_method
,
do_log
,
...
...
@@ -158,37 +159,33 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
wdilation
,
pad1
,
pad2
);
}
}
#endif
#ifdef CK_ENABLE_BF16
else
if
(
data_type
==
ck
::
DataTypeEnum
::
BFloat16
)
else
if
(
data_type
==
PoolDataType
::
BF16
)
{
if
(
return_index
)
ck
::
profiler
::
profile_pool3d_fwd_impl
<
BF16
,
BF16
,
BF16
,
I32
,
NDHWC
,
NDHWC
,
ReduceOpId
,
false
,
true
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
{
ck
::
profiler
::
profile_pool2d_fwd_impl
<
BF16
,
BF16
,
BF16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
true
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
ck
::
profiler
::
profile_pool3d_fwd_impl
<
BF16
,
{
ck
::
profiler
::
profile_pool2d_fwd_impl
<
BF16
,
BF16
,
BF16
,
I32
,
N
D
HWC
,
N
D
HWC
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
false
>
(
do_verification
,
...
...
@@ -201,14 +198,14 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
wdilation
,
pad1
,
pad2
);
}
}
#endif
#ifdef CK_ENABLE_FP32
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Float
)
else
if
(
data_type
==
PoolDataType
::
F32
)
{
if
(
return_index
)
{
ck
::
profiler
::
profile_pool
3
d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
N
D
HWC
,
N
D
HWC
,
ReduceOpId
,
false
,
true
>
(
profile_pool
2
d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
true
>
(
do_verification
,
init_method
,
do_log
,
...
...
@@ -219,9 +216,11 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
wdilation
,
pad1
,
pad2
);
}
else
{
ck
::
profiler
::
profile_pool
3
d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
N
D
HWC
,
N
D
HWC
,
ReduceOpId
,
false
,
false
>
(
profile_pool
2
d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
false
>
(
do_verification
,
init_method
,
do_log
,
...
...
@@ -232,8 +231,74 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
wdilation
,
pad1
,
pad2
);
}
}
else
if
(
data_type
==
PoolDataType
::
INT8
)
{
if
(
return_index
)
{
ck
::
profiler
::
profile_pool2d_fwd_impl
<
I8
,
I8
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
true
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
{
ck
::
profiler
::
profile_pool2d_fwd_impl
<
I8
,
I8
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
false
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
}
else
if
(
data_type
==
PoolDataType
::
F8
)
{
if
(
return_index
)
{
ck
::
profiler
::
profile_pool2d_fwd_impl
<
F8
,
F8
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
true
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
else
{
ck
::
profiler
::
profile_pool2d_fwd_impl
<
F8
,
F8
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
,
false
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
in_length
,
wsize
,
wstride
,
wdilation
,
pad1
,
pad2
);
}
}
#endif
else
{
throw
std
::
runtime_error
(
"not implemented yet"
);
...
...
@@ -242,4 +307,4 @@ int profile_max_pool3d_fwd(int argc, char* argv[])
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"max_pool
3
d_fwd"
,
"max_pool
3
d fwd"
,
profile_max_pool
3
d_fwd
);
REGISTER_PROFILER_OPERATION
(
"max_pool
2
d_fwd"
,
"max_pool
2
d fwd"
,
profile_max_pool
2
d_fwd
);
profiler/src/profile_pool3d_fwd.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <vector>
#include <unordered_map>
#include "profiler/data_type_enum.hpp"
#include "profiler/profile_pool3d_fwd_impl.hpp"
#include "profiler_operation_registry.hpp"
using
ck
::
index_t
;
struct
poolFwdArgParser
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
long_opts
=
{{
"length"
,
{}},
{
"wsize"
,
{}},
{
"wstride"
,
{}},
{
"wdilation"
,
{}},
{
"pad1"
,
{}},
{
"pad2"
,
{}}};
bool
parse_opt
(
int
argc
,
char
*
argv
[],
const
std
::
string
&
key
,
int
i
)
{
if
(
std
::
string
(
"--"
)
+
key
==
argv
[
i
])
{
int
pos
=
i
;
while
(
++
i
<
argc
&&
argv
[
i
][
0
]
!=
'-'
)
{}
int
end
=
i
;
for
(
int
j
=
pos
+
1
;
j
<
end
;
j
++
)
{
long_opts
[
key
].
push_back
(
std
::
stoi
(
argv
[
j
]));
}
return
true
;
}
return
false
;
}
void
operator
()(
int
argc
,
char
*
argv
[])
{
for
(
auto
&
kv
:
long_opts
)
{
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
if
(
parse_opt
(
argc
,
argv
,
kv
.
first
,
i
))
break
;
}
}
}
};
void
print_help_pool3d_fwd
()
{
std
::
cout
<<
"arg1: data type (0: fp16; 1: fp32; 3: int8; 5: bf16; 7: fp8)
\n
"
<<
"arg2: verification (0: no; 1: yes)
\n
"
<<
"arg3: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
<<
"arg4: print tensor value (0: no; 1: yes)
\n
"
<<
"arg5: time kernel (0=no, 1=yes)
\n
"
<<
"arg6: return index (0=no, 1=yes)
\n
"
<<
"arg7: reduce op (0: max; 1: avg)
\n
"
<<
"--length: input tensor length for NCDHW(e.g, --length 2 32 30 30 30)
\n
"
<<
"--wsize: window size for ZYX (e.g, --wsize 2 2 2)
\n
"
<<
"--wstride: window stride for DHW (e.g, --wstride 2 2 2)
\n
"
<<
"--wdilation: window dilation for DHW (e.g, --wdilation 1 1 1)
\n
"
<<
"--pad1: left side of padding in DHW (e.g, --pad1 1 1 1)
\n
"
<<
"--pad2: right side of padding in DHW (e.g, --pad2 1 1 1)
\n
"
<<
"eg: ckProfiler pool3d_fwd 0 1 2 0 1 0 --length 2 32 30 30 30 --wsize 2 2 2 "
"--wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1"
<<
std
::
endl
;
}
int
profile_pool3d_fwd
(
int
argc
,
char
*
argv
[])
{
ck
::
DataTypeEnum
data_type
=
ck
::
DataTypeEnum
::
Half
;
ck
::
profiler
::
PoolFwdInputParams
in_params
{
true
,
0
,
false
,
true
,
false
,
0
};
ck
::
profiler
::
PoolFwdKernelParams
kernel_params
{
{
2
,
32
,
30
,
30
,
30
},
{
2
,
2
,
2
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}};
if
(
argc
!=
2
&&
argc
!=
35
)
{
print_help_pool3d_fwd
();
return
0
;
}
else
if
(
argc
==
35
)
{
data_type
=
static_cast
<
ck
::
DataTypeEnum
>
(
std
::
stoi
(
argv
[
2
]));
in_params
.
do_verification
=
std
::
stoi
(
argv
[
3
]);
in_params
.
init_method
=
std
::
stoi
(
argv
[
4
]);
in_params
.
do_log
=
std
::
stoi
(
argv
[
5
]);
in_params
.
time_kernel
=
std
::
stoi
(
argv
[
6
]);
in_params
.
return_index
=
std
::
stoi
(
argv
[
7
]);
in_params
.
reduce_op
=
std
::
stoi
(
argv
[
8
]);
// parse the long options
poolFwdArgParser
arg_parser
;
arg_parser
(
argc
,
argv
);
kernel_params
.
in_length
=
arg_parser
.
long_opts
[
"length"
];
kernel_params
.
window_spatial_lengths
=
arg_parser
.
long_opts
[
"wsize"
];
kernel_params
.
window_strides
=
arg_parser
.
long_opts
[
"wstride"
];
kernel_params
.
window_dilations
=
arg_parser
.
long_opts
[
"wdilation"
];
kernel_params
.
input_left_pads
=
arg_parser
.
long_opts
[
"pad1"
];
kernel_params
.
input_right_pads
=
arg_parser
.
long_opts
[
"pad2"
];
}
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
F32
=
float
;
using
I8
=
int8_t
;
using
I32
=
int32_t
;
using
F8
=
ck
::
f8_t
;
using
NDHWC
=
ck
::
tensor_layout
::
convolution
::
NDHWC
;
if
(
data_type
==
ck
::
DataTypeEnum
::
Half
)
{
if
(
in_params
.
reduce_op
==
1
)
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F16
,
F16
,
F32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
in_params
,
kernel_params
);
}
else
{
// reduce_op == 0
if
(
in_params
.
return_index
)
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F16
,
F16
,
F16
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
in_params
,
kernel_params
);
}
else
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F16
,
F16
,
F16
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
in_params
,
kernel_params
);
}
}
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
BFloat16
)
{
if
(
in_params
.
reduce_op
==
1
)
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
BF16
,
BF16
,
F32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
in_params
,
kernel_params
);
}
else
{
// reduce_op == 0
if
(
in_params
.
return_index
)
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
BF16
,
BF16
,
BF16
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
in_params
,
kernel_params
);
}
else
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
BF16
,
BF16
,
BF16
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
in_params
,
kernel_params
);
}
}
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Float
)
{
if
(
in_params
.
reduce_op
==
1
)
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
in_params
,
kernel_params
);
}
else
{
// reduce_op == 0
if
(
in_params
.
return_index
)
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
in_params
,
kernel_params
);
}
else
{
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F32
,
F32
,
F32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
in_params
,
kernel_params
);
}
}
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Float8
)
{
if
(
in_params
.
reduce_op
==
1
)
{
return
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F8
,
F8
,
F32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
in_params
,
kernel_params
);
}
else
{
// reduce_op == 0
if
(
in_params
.
return_index
)
{
return
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F8
,
F8
,
F8
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
in_params
,
kernel_params
);
}
else
{
return
ck
::
profiler
::
profile_pool3d_fwd_impl
<
F8
,
F8
,
F8
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
in_params
,
kernel_params
);
}
}
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Int8
)
{
if
(
in_params
.
reduce_op
==
1
)
{
return
ck
::
profiler
::
profile_pool3d_fwd_impl
<
I8
,
I8
,
I32
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
in_params
,
kernel_params
);
}
else
{
// reduce_op == 0
if
(
in_params
.
return_index
)
{
return
ck
::
profiler
::
profile_pool3d_fwd_impl
<
I8
,
I8
,
I8
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
true
>
(
in_params
,
kernel_params
);
}
else
{
return
ck
::
profiler
::
profile_pool3d_fwd_impl
<
I8
,
I8
,
I8
,
I32
,
NDHWC
,
NDHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
false
>
(
in_params
,
kernel_params
);
}
}
}
else
{
throw
std
::
runtime_error
(
"not implemented yet"
);
}
return
0
;
}
REGISTER_PROFILER_OPERATION
(
"pool3d_fwd"
,
"pool3d fwd"
,
profile_pool3d_fwd
);
script/cmake-ck-dev.sh
View file @
09d4c3a4
...
...
@@ -7,8 +7,10 @@ MY_PROJECT_SOURCE=$1
if
[
$#
-ge
2
]
;
then
GPU_TARGETS
=
$2
REST_ARGS
=
${
@
:3
}
else
GPU_TARGETS
=
"gfx908;gfx90a;gfx940"
REST_ARGS
=
fi
cmake
\
...
...
@@ -20,4 +22,5 @@ cmake
-D
GPU_TARGETS
=
$GPU_TARGETS
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
USE_BITINT_EXTENSION_INT4
=
OFF
\
$REST_ARGS
\
${
MY_PROJECT_SOURCE
}
script/cmake-ck-release.sh
View file @
09d4c3a4
...
...
@@ -7,8 +7,10 @@ MY_PROJECT_SOURCE=$1
if
[
$#
-ge
2
]
;
then
GPU_TARGETS
=
$2
REST_ARGS
=
${
@
:3
}
else
GPU_TARGETS
=
"gfx908;gfx90a;gfx940"
REST_ARGS
=
fi
cmake
\
...
...
@@ -20,5 +22,6 @@ cmake
-D
GPU_TARGETS
=
$GPU_TARGETS
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
USE_BITINT_EXTENSION_INT4
=
OFF
\
$REST_ARGS
\
${
MY_PROJECT_SOURCE
}
script/convert_miopen_driver_to_profiler.py
View file @
09d4c3a4
...
...
@@ -28,6 +28,8 @@ def parse_layouts(args):
args
.
in_layout
==
"NCDHW"
:
if
args
.
ck_profier_op
==
"grouped_conv_bwd_weight"
:
args
.
layout
=
3
elif
args
.
ck_profier_op
==
"grouped_conv_fwd"
:
args
.
layout
=
2
else
:
print
(
'Not supported layout for this op'
)
exit
(
1
)
...
...
test/CMakeLists.txt
View file @
09d4c3a4
...
...
@@ -173,6 +173,7 @@ function(add_gtest_executable TEST_NAME)
endfunction
()
add_compile_options
(
-Wno-c++20-extensions
)
add_subdirectory
(
ck_tile
)
add_subdirectory
(
magic_number_division
)
add_subdirectory
(
space_filling_curve
)
add_subdirectory
(
conv_util
)
...
...
test/ck_tile/CMakeLists.txt
0 → 100644
View file @
09d4c3a4
add_subdirectory
(
image_to_column
)
test/ck_tile/image_to_column/CMakeLists.txt
0 → 100644
View file @
09d4c3a4
# Currently ck_tile is only built on gfx9
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_gtest_executable
(
test_tile_image_to_column test_tile_image_to_column.cpp
)
endif
()
test/ck_tile/image_to_column/test_tile_image_to_column.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm>
#include <gtest/gtest.h>
#include "ck_tile/host.hpp"
#include "ck_tile/core.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/image_to_column.hpp"
// Host API implementation
template
<
typename
DataType
>
class
TestCkTileImageToColumn
:
public
::
testing
::
Test
{
static
constexpr
ck_tile
::
index_t
VectorSize
=
1
;
static
constexpr
ck_tile
::
index_t
NDimSpatial
=
2
;
protected:
void
Run
(
const
ck_tile
::
conv
::
ConvParam
conv_params
)
{
using
ImLayout
=
ck_tile
::
tensor_layout
::
convolution
::
NHWGC
;
const
auto
G
=
conv_params
.
G_
;
const
auto
N
=
conv_params
.
N_
;
const
auto
C
=
conv_params
.
C_
;
const
ck_tile
::
long_index_t
NDoHoWo
=
N
*
std
::
accumulate
(
conv_params
.
output_spatial_lengths_
.
begin
(),
std
::
next
(
conv_params
.
output_spatial_lengths_
.
begin
(),
NDimSpatial
),
1
,
std
::
multiplies
<>
());
const
ck_tile
::
long_index_t
CZYX
=
C
*
std
::
accumulate
(
conv_params
.
filter_spatial_lengths_
.
begin
(),
std
::
next
(
conv_params
.
filter_spatial_lengths_
.
begin
(),
NDimSpatial
),
1
,
std
::
multiplies
<>
());
const
auto
in_desc
=
ck_tile
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
ImLayout
>
(
conv_params
);
const
auto
out_desc
=
ck_tile
::
HostTensorDescriptor
({
G
,
NDoHoWo
,
CZYX
});
// host verify
ck_tile
::
HostTensor
<
DataType
>
in
(
in_desc
);
ck_tile
::
HostTensor
<
DataType
>
out_device
(
out_desc
);
ck_tile
::
HostTensor
<
DataType
>
out_host
(
out_desc
);
std
::
cout
<<
"input: "
<<
in
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"output: "
<<
out_device
.
mDesc
<<
std
::
endl
;
ck_tile
::
FillUniformDistributionIntegerValue
<
DataType
>
{
-
5.
f
,
5.
f
}(
in
);
ck_tile
::
DeviceMem
in_device_buf
(
in
.
get_element_space_size_in_bytes
());
ck_tile
::
DeviceMem
out_device_buf
(
out_device
.
get_element_space_size_in_bytes
());
in_device_buf
.
ToDevice
(
in
.
data
());
using
thread_tile
=
ck_tile
::
sequence
<
4
,
4
>
;
using
warp_tile
=
ck_tile
::
sequence
<
8
,
128
>
;
using
block_tile
=
ck_tile
::
sequence
<
32
,
128
>
;
using
Shape
=
ck_tile
::
TileImageToColumnShape
<
thread_tile
,
warp_tile
,
block_tile
>
;
using
PipelineProblem
=
ck_tile
::
BlockImageToColumnProblem
<
DataType
,
DataType
,
Shape
,
NDimSpatial
,
VectorSize
,
VectorSize
>
;
using
Kernel
=
ck_tile
::
ImageToColumn
<
PipelineProblem
>
;
auto
kargs
=
Kernel
::
MakeKargs
(
in_device_buf
.
GetDeviceBuffer
(),
out_device_buf
.
GetDeviceBuffer
(),
G
,
N
,
C
,
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
input_spatial_lengths_
),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
filter_spatial_lengths_
),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
output_spatial_lengths_
),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
+
3
>
(
in_desc
.
get_strides
()),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
3
>
(
out_desc
.
get_strides
()),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
conv_filter_strides_
),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
conv_filter_dilations_
),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
input_left_pads_
),
ck_tile
::
to_array
<
ck_tile
::
long_index_t
,
NDimSpatial
>
(
conv_params
.
input_right_pads_
));
const
dim3
grids
=
Kernel
::
GridSize
(
kargs
.
N
*
kargs
.
output_spatial_lengths
[
0
]
*
kargs
.
output_spatial_lengths
[
1
],
kargs
.
filter_spatial_lengths
[
0
]
*
kargs
.
filter_spatial_lengths
[
1
]
*
kargs
.
C
,
kargs
.
G
);
constexpr
dim3
blocks
=
Kernel
::
BlockSize
();
constexpr
ck_tile
::
index_t
kBlockPerCu
=
2
;
ck_tile
::
launch_kernel
(
ck_tile
::
stream_config
{},
ck_tile
::
make_kernel
<
blocks
.
x
,
kBlockPerCu
>
(
Kernel
{},
grids
,
blocks
,
0
,
kargs
));
// reference
ck_tile
::
reference_im2col
<
DataType
,
DataType
,
NDimSpatial
>
(
in
,
out_host
,
conv_params
);
out_device_buf
.
FromDevice
(
out_device
.
data
());
bool
pass
=
ck_tile
::
check_err
(
out_device
,
out_host
);
EXPECT_TRUE
(
pass
);
}
};
class
TestCkTileImageToColumnFloat
:
public
TestCkTileImageToColumn
<
float
>
{
};
class
TestCkTileImageToColumnHalf
:
public
TestCkTileImageToColumn
<
ck_tile
::
half_t
>
{
};
TEST_F
(
TestCkTileImageToColumnFloat
,
TestCorrectness
)
{
this
->
Run
({
2
,
2
,
4
,
1
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
Run
({
2
,
2
,
64
,
1
,
64
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
Run
({
2
,
1
,
64
,
1
,
64
,
{
1
,
1
},
{
7
,
7
},
{
3
,
3
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
Run
({
2
,
1
,
64
,
1
,
64
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
Run
({
2
,
2
,
64
,
1
,
64
,
{
3
,
3
},
{
28
,
28
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
}});
}
TEST_F
(
TestCkTileImageToColumnHalf
,
TestCorrectness
)
{
this
->
Run
({
2
,
2
,
4
,
1
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
Run
({
2
,
2
,
64
,
1
,
64
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
Run
({
2
,
1
,
64
,
1
,
64
,
{
1
,
1
},
{
7
,
7
},
{
3
,
3
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
Run
({
2
,
1
,
64
,
1
,
64
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
Run
({
2
,
2
,
64
,
1
,
64
,
{
3
,
3
},
{
28
,
28
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
}});
}
test/gemm_universal/test_gemm_universal_ut_cases.inc
View file @
09d4c3a4
...
...
@@ -28,6 +28,38 @@ TYPED_TEST(TestGemmUniversal_MK_NK, SmallM)
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
TYPED_TEST
(
TestGemmUniversal_KM_KN
,
SmallM
)
{
std
::
vector
<
int
>
Ms
{
1
,
2
,
3
,
4
,
5
,
6
};
constexpr
int
N
=
512
;
constexpr
int
K
=
320
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_KM_NK
,
SmallM
)
{
std
::
vector
<
int
>
Ms
{
1
,
2
,
3
,
4
,
5
,
6
};
constexpr
int
N
=
512
;
constexpr
int
K
=
320
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_MK_KN
,
MidLargeM
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
...
...
@@ -56,6 +88,38 @@ TYPED_TEST(TestGemmUniversal_MK_NK, MidLargeM)
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
TYPED_TEST
(
TestGemmUniversal_KM_KN
,
MidLargeM
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
constexpr
int
N
=
512
;
constexpr
int
K
=
320
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_KM_NK
,
MidLargeM
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
constexpr
int
N
=
512
;
constexpr
int
K
=
320
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_MK_KN
,
PaddK
)
{
std
::
vector
<
int
>
Ms
{
127
};
...
...
@@ -84,6 +148,38 @@ TYPED_TEST(TestGemmUniversal_MK_NK, PaddK)
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
TYPED_TEST
(
TestGemmUniversal_KM_KN
,
PaddK
)
{
std
::
vector
<
int
>
Ms
{
127
};
constexpr
int
N
=
512
;
constexpr
int
K
=
437
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_KM_NK
,
PaddK
)
{
std
::
vector
<
int
>
Ms
{
127
};
constexpr
int
N
=
512
;
constexpr
int
K
=
437
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_MK_KN
,
Regular
)
{
std
::
vector
<
int
>
Ms
{
512
};
...
...
@@ -111,3 +207,35 @@ TYPED_TEST(TestGemmUniversal_MK_NK, Regular)
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
TYPED_TEST
(
TestGemmUniversal_KM_KN
,
Regular
)
{
std
::
vector
<
int
>
Ms
{
512
};
constexpr
int
N
=
512
;
constexpr
int
K
=
512
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
TYPED_TEST
(
TestGemmUniversal_KM_NK
,
Regular
)
{
std
::
vector
<
int
>
Ms
{
512
};
constexpr
int
N
=
512
;
constexpr
int
K
=
512
;
constexpr
int
StrideB
=
N
;
constexpr
int
StrideC
=
N
;
for
(
int
M
:
Ms
)
{
int
StrideA
=
M
;
this
->
Run
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
);
}
}
test/gemm_universal/test_gemm_universal_xdl.cpp
View file @
09d4c3a4
...
...
@@ -40,6 +40,18 @@ class TestGemmUniversal_MK_NK
{
};
template
<
typename
Tuple
>
class
TestGemmUniversal_KM_KN
:
public
ck
::
test
::
TestGemmUniversal
<
typename
tuple_concat
<
std
::
tuple
<
Col
,
Row
>
,
Tuple
>::
type
>
{
};
template
<
typename
Tuple
>
class
TestGemmUniversal_KM_NK
:
public
ck
::
test
::
TestGemmUniversal
<
typename
tuple_concat
<
std
::
tuple
<
Col
,
Col
>
,
Tuple
>::
type
>
{
};
// clang-format off
using
KernelTypes_MK_KN
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
...
...
@@ -61,9 +73,22 @@ using KernelTypes_MK_NK = ::testing::Types<
#endif
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
using
KernelTypes_KM_NK
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
using
KernelTypes_KM_KN
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestGemmUniversal_MK_KN
,
KernelTypes_MK_KN
);
TYPED_TEST_SUITE
(
TestGemmUniversal_MK_NK
,
KernelTypes_MK_NK
);
TYPED_TEST_SUITE
(
TestGemmUniversal_KM_KN
,
KernelTypes_KM_KN
);
TYPED_TEST_SUITE
(
TestGemmUniversal_KM_NK
,
KernelTypes_KM_NK
);
#include "test_gemm_universal_ut_cases.inc"
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
View file @
09d4c3a4
...
...
@@ -62,7 +62,9 @@ using KernelTypes2d = ::testing::Types<std::tuple<float, GNHWC, GKYXC, GNHWK>,
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
>>
;
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
float
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
ck
::
half_t
,
NGCHW
,
GKYXC
,
NGKHW
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
...
...
test/pool/CMakeLists.txt
View file @
09d4c3a4
...
...
@@ -4,13 +4,25 @@ add_gtest_executable(test_avg_pool3d_bwd test_avg_pool3d_bwd.cpp)
add_gtest_executable
(
test_max_pool3d_bwd test_max_pool3d_bwd.cpp
)
add_gtest_executable
(
test_avg_pool3d_fwd test_avg_pool3d_fwd.cpp
)
add_gtest_executable
(
test_max_pool3d_fwd test_max_pool3d_fwd.cpp
)
add_gtest_executable
(
test_avg_pool2d_bwd test_avg_pool2d_bwd.cpp
)
add_gtest_executable
(
test_max_pool2d_bwd test_max_pool2d_bwd.cpp
)
add_gtest_executable
(
test_avg_pool2d_fwd test_avg_pool2d_fwd.cpp
)
add_gtest_executable
(
test_max_pool2d_fwd test_max_pool2d_fwd.cpp
)
target_link_libraries
(
test_avg_pool3d_bwd PRIVATE utility device_avg_pool3d_bwd_instance
)
target_link_libraries
(
test_avg_pool2d_bwd PRIVATE utility device_avg_pool2d_bwd_instance
)
target_link_libraries
(
test_max_pool2d_bwd PRIVATE utility device_max_pool_bwd_instance
)
target_link_libraries
(
test_max_pool3d_bwd PRIVATE utility device_max_pool_bwd_instance
)
target_link_libraries
(
test_avg_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance
)
target_link_libraries
(
test_max_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance
)
target_link_libraries
(
test_avg_pool2d_fwd PRIVATE utility device_pool2d_fwd_instance
)
target_link_libraries
(
test_max_pool2d_fwd PRIVATE utility device_pool2d_fwd_instance
)
add_dependencies
(
test_pool test_avg_pool3d_bwd
)
add_dependencies
(
test_pool test_max_pool3d_bwd
)
add_dependencies
(
test_pool test_avg_pool3d_fwd
)
add_dependencies
(
test_pool test_max_pool3d_fwd
)
add_dependencies
(
test_pool test_avg_pool2d_bwd
)
add_dependencies
(
test_pool test_max_pool2d_bwd
)
add_dependencies
(
test_pool test_avg_pool2d_fwd
)
add_dependencies
(
test_pool test_max_pool2d_fwd
)
test/pool/test_avg_pool2d_bwd.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_avg_pool2d_bwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template
<
typename
T
>
class
AvgPool2dBWDTest
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
T
>
;
using
OutDataType
=
std
::
tuple_element_t
<
1
,
T
>
;
static
std
::
vector
<
PoolingParam
>
params
;
void
Run
()
{
for
(
auto
param
:
this
->
params
)
{
bool
success
=
ck
::
profiler
::
profile_avg_pool2d_bwd_impl
<
InDataType
,
OutDataType
,
NHWC
,
NHWC
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
template
<
typename
T
>
std
::
vector
<
PoolingParam
>
AvgPool2dBWDTest
<
T
>::
params
=
{
{{
1
,
1
,
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
1
,
1
,
64
,
64
},
{
64
,
64
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
1
,
5
,
7
,
7
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
2
,
2
},
{
0
,
0
}},
{{
1
,
1
,
8
,
8
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
2
,
2
},
{
0
,
0
}},
{{
1
,
1
,
8
,
8
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
}},
{{
2
,
32
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}},
{{
1
,
2
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}}};
using
Avg_Pool_2D_f32_types
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
>>
;
using
Avg_Pool_2D_int8_types
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
>>
;
using
Avg_Pool_2D_f16_types
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
>>
;
using
Avg_Pool_2D_bf16_types
=
::
testing
::
Types
<
std
::
tuple
<
BF16
,
BF16
>>
;
using
Avg_Pool_2D_f8_types
=
::
testing
::
Types
<
std
::
tuple
<
F8
,
F8
>>
;
template
<
typename
TType
>
class
AvgPool2D_f32
:
public
AvgPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP32
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_f32 tests because CK_ENABLE_FP32 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_int8
:
public
AvgPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_INT8
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_int8 tests because CK_ENABLE_INT8 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_f16
:
public
AvgPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP16
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_f16 because CK_ENABLE_FP16 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_bf16
:
public
AvgPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_BF16
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_bf16 tests because CK_ENABLE_BF16 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_f8
:
public
AvgPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP8
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_f8 tests because CK_ENABLE_FP8 is not enabled"
;
}
}
};
TYPED_TEST_SUITE
(
AvgPool2D_f32
,
Avg_Pool_2D_f32_types
);
TYPED_TEST_SUITE
(
AvgPool2D_int8
,
Avg_Pool_2D_int8_types
);
TYPED_TEST_SUITE
(
AvgPool2D_f16
,
Avg_Pool_2D_f16_types
);
TYPED_TEST_SUITE
(
AvgPool2D_bf16
,
Avg_Pool_2D_bf16_types
);
TYPED_TEST_SUITE
(
AvgPool2D_f8
,
Avg_Pool_2D_f8_types
);
TYPED_TEST
(
AvgPool2D_f32
,
AvgPool2DTest_f32
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_int8
,
AvgPool2DTest_int8
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_f16
,
AvgPool2DTest_f16
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_bf16
,
AvgPool2DTest_bf16
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_f8
,
AvgPool2DTest_f8
)
{
this
->
Run
();
}
test/pool/test_avg_pool2d_fwd.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool2d_fwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template
<
typename
Tuple
>
class
TestAvgPool2dFwd
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
ComputeDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
IndexDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
static
std
::
vector
<
PoolingParam
>
params
;
void
Run
()
{
for
(
auto
param
:
params
)
{
bool
success
=
ck
::
profiler
::
profile_pool2d_fwd_impl
<
InDataType
,
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
template
<
typename
T
>
std
::
vector
<
PoolingParam
>
TestAvgPool2dFwd
<
T
>::
params
=
{
{{{
1
,
1
,
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
64
,
64
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
4
,
4
},
{
4
,
4
},
{
2
,
2
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
32
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}}}};
using
AvgPool2D_F32_Types
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
using
AvgPool2D_F16_Types
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>
,
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>>
;
using
AvgPool2D_BF16_Types
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
,
F32
,
I32
>
,
std
::
tuple
<
BF16
,
BF16
,
F32
,
I32
>>
;
using
AvgPool2D_I8_Types
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
,
F32
,
I32
>
,
std
::
tuple
<
I8
,
I8
,
F32
,
I32
>>
;
using
AvgPool2D_F8_Types
=
::
testing
::
Types
<
std
::
tuple
<
F8
,
F8
,
F32
,
I32
>
,
std
::
tuple
<
F8
,
F8
,
F32
,
I32
>>
;
template
<
typename
TType
>
class
AvgPool2D_F32
:
public
TestAvgPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP32
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_F32 tests because CK_ENABLE_FP32 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_F16
:
public
TestAvgPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP16
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_F16 tests because CK_ENABLE_FP16 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_BF16
:
public
TestAvgPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_BF16
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_BF16 tests because CK_ENABLE_BF16 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_I8
:
public
TestAvgPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_INT8
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_I8 tests because CK_ENABLE_INT8 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
AvgPool2D_F8
:
public
TestAvgPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP8
)
{
GTEST_SKIP
()
<<
"Skipping AvgPool2D_F8 tests because CK_ENABLE_FP8 is "
"not enabled"
;
}
}
};
TYPED_TEST_SUITE
(
AvgPool2D_F32
,
AvgPool2D_F32_Types
);
TYPED_TEST_SUITE
(
AvgPool2D_F16
,
AvgPool2D_F16_Types
);
TYPED_TEST_SUITE
(
AvgPool2D_BF16
,
AvgPool2D_BF16_Types
);
TYPED_TEST_SUITE
(
AvgPool2D_I8
,
AvgPool2D_I8_Types
);
TYPED_TEST_SUITE
(
AvgPool2D_F8
,
AvgPool2D_F8_Types
);
TYPED_TEST
(
AvgPool2D_F32
,
AvgPool2D_I8_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_F16
,
AvgPool2D_F16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_BF16
,
AvgPool2D_BF16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_I8
,
AvgPool2D_I8_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
AvgPool2D_F8
,
AvgPool2D_F8_Test
)
{
this
->
Run
();
}
test/pool/test_avg_pool3d_fwd.cpp
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool3d_fwd_impl.hpp"
...
...
@@ -16,10 +16,19 @@ class TestAvgPool3dFwd : public ::testing::Test
std
::
vector
<
PoolingParam
>
params
;
ck
::
profiler
::
PoolFwdInputParams
in_params_avg_pool
{
true
,
2
,
false
,
false
,
false
,
1
};
void
Run
()
{
for
(
auto
param
:
params
)
{
ck
::
profiler
::
PoolFwdKernelParams
kernel_params
{
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
};
bool
success
=
ck
::
profiler
::
profile_pool3d_fwd_impl
<
InDataType
,
OutDataType
,
...
...
@@ -29,26 +38,18 @@ class TestAvgPool3dFwd : public ::testing::Test
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
ReduceTensorOp
::
AVG
,
false
,
false
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
false
>
(
in_params_avg_pool
,
kernel_params
);
EXPECT_TRUE
(
success
);
}
}
};
#ifdef CK_ENABLE_FP16
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
#else
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
#endif
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
,
I32
,
I32
>
,
std
::
tuple
<
F8
,
F8
,
F32
,
I32
>
,
std
::
tuple
<
F16
,
F16
,
F32
,
I32
>
,
std
::
tuple
<
BF16
,
BF16
,
F32
,
I32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
>>
;
TYPED_TEST_SUITE
(
TestAvgPool3dFwd
,
KernelTypes
);
TYPED_TEST
(
TestAvgPool3dFwd
,
Test_Pool
)
{
...
...
test/pool/test_max_pool2d_bwd.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_max_pool2d_bwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template
<
typename
T
>
class
MaxPool2dBWDTest
:
public
::
testing
::
Test
{
protected:
using
DOutDataType
=
std
::
tuple_element_t
<
0
,
T
>
;
using
DInDataType
=
std
::
tuple_element_t
<
1
,
T
>
;
using
IndexDataType
=
std
::
tuple_element_t
<
2
,
T
>
;
using
InDataType
=
DInDataType
;
using
OutDataType
=
DOutDataType
;
static
std
::
vector
<
PoolingParam
>
params
;
void
Run
()
{
for
(
auto
param
:
this
->
params
)
{
bool
success
=
ck
::
profiler
::
profile_max_pool2d_bwd_impl
<
InDataType
,
OutDataType
,
IndexDataType
,
DOutDataType
,
DInDataType
,
false
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
template
<
typename
T
>
std
::
vector
<
PoolingParam
>
MaxPool2dBWDTest
<
T
>::
params
=
{
{{
1
,
1
,
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
64
,
64
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
4
,
4
},
{
4
,
4
},
{
2
,
2
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
32
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}},
{{
2
,
2
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}}};
using
Max_Pool_2D_f32_types
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
,
I32
>>
;
using
Max_Pool_2D_int8_types
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
,
I32
>>
;
using
Max_Pool_2D_f16_types
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
I32
>>
;
using
Max_Pool_2D_bf16_types
=
::
testing
::
Types
<
std
::
tuple
<
BF16
,
BF16
,
I32
>>
;
using
Max_Pool_2D_f8_types
=
::
testing
::
Types
<
std
::
tuple
<
F8
,
F8
,
I32
>>
;
template
<
typename
TType
>
class
MaxPool2D_f32
:
public
MaxPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP32
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_f32 tests because CK_ENABLE_FP32 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_int8
:
public
MaxPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_INT8
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_int8 tests because CK_ENABLE_INT8 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_f16
:
public
MaxPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP16
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_f16 because CK_ENABLE_FP16 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_bf16
:
public
MaxPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_BF16
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_bf16 tests because CK_ENABLE_BF16 is not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_f8
:
public
MaxPool2dBWDTest
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP8
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_f8 tests because CK_ENABLE_FP8 is not enabled"
;
}
}
};
TYPED_TEST_SUITE
(
MaxPool2D_f32
,
Max_Pool_2D_f32_types
);
TYPED_TEST_SUITE
(
MaxPool2D_int8
,
Max_Pool_2D_int8_types
);
TYPED_TEST_SUITE
(
MaxPool2D_f16
,
Max_Pool_2D_f16_types
);
TYPED_TEST_SUITE
(
MaxPool2D_bf16
,
Max_Pool_2D_bf16_types
);
TYPED_TEST_SUITE
(
MaxPool2D_f8
,
Max_Pool_2D_f8_types
);
TYPED_TEST
(
MaxPool2D_f32
,
MaxPool2DTest_f32
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_int8
,
MaxPool2DTest_int8
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_f16
,
MaxPool2DTest_f16
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_bf16
,
MaxPool2DTest_bf16
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_f8
,
MaxPool2DTest_f8
)
{
this
->
Run
();
}
test/pool/test_max_pool2d_fwd.cpp
0 → 100644
View file @
09d4c3a4
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool2d_fwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template
<
typename
Tuple
>
class
TestMaxPool2dFwd
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
ComputeDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
IndexDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
static
constexpr
bool
ReturnIndex
=
std
::
tuple_element_t
<
4
,
Tuple
>::
value
;
static
std
::
vector
<
PoolingParam
>
params
;
void
Run
()
{
for
(
auto
param
:
params
)
{
// max pool
bool
success
=
ck
::
profiler
::
profile_pool2d_fwd_impl
<
InDataType
,
OutDataType
,
ComputeDataType
,
IndexDataType
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
ReduceTensorOp
::
MAX
,
false
,
ReturnIndex
>
(
true
,
2
,
false
,
false
,
param
.
length_
,
param
.
window_spatial_lengths_
,
param
.
window_strides_
,
param
.
window_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
);
EXPECT_TRUE
(
success
);
}
}
};
template
<
typename
T
>
std
::
vector
<
PoolingParam
>
TestMaxPool2dFwd
<
T
>::
params
=
{
{{{
1
,
1
,
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
64
,
64
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
16
,
64
,
64
},
{
4
,
4
},
{
4
,
4
},
{
2
,
2
},
{
0
,
0
},
{
0
,
0
}},
{{
2
,
32
,
30
,
30
},
{
2
,
2
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}}}};
using
true_t
=
std
::
integral_constant
<
bool
,
true
>
;
using
false_t
=
std
::
integral_constant
<
bool
,
false
>
;
using
MaxPool2D_F32_Types
=
::
testing
::
Types
<
std
::
tuple
<
F32
,
F32
,
F32
,
I32
,
true_t
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I32
,
false_t
>>
;
using
MaxPool2D_F16_Types
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
,
F32
,
I32
,
true_t
>
,
std
::
tuple
<
F16
,
F16
,
F32
,
I32
,
false_t
>>
;
using
MaxPool2D_BF16_Types
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
,
F32
,
I32
,
true_t
>
,
std
::
tuple
<
BF16
,
BF16
,
F32
,
I32
,
false_t
>>
;
using
MaxPool2D_I8_Types
=
::
testing
::
Types
<
std
::
tuple
<
I8
,
I8
,
F32
,
I32
,
true_t
>
,
std
::
tuple
<
I8
,
I8
,
F32
,
I32
,
false_t
>>
;
using
MaxPool2D_F8_Types
=
::
testing
::
Types
<
std
::
tuple
<
F8
,
F8
,
F32
,
I32
,
true_t
>
,
std
::
tuple
<
F8
,
F8
,
F32
,
I32
,
false_t
>>
;
template
<
typename
TType
>
class
MaxPool2D_F32
:
public
TestMaxPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP32
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_F32 tests because CK_ENABLE_FP32 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_F16
:
public
TestMaxPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP16
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_F16 tests because CK_ENABLE_FP16 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_BF16
:
public
TestMaxPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_BF16
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_BF16 tests because CK_ENABLE_BF16 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_I8
:
public
TestMaxPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_INT8
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_I8 tests because CK_ENABLE_INT8 is "
"not enabled"
;
}
}
};
template
<
typename
TType
>
class
MaxPool2D_F8
:
public
TestMaxPool2dFwd
<
TType
>
{
protected:
void
SetUp
()
override
{
if
(
!
CK_ENABLE_FP8
)
{
GTEST_SKIP
()
<<
"Skipping MaxPool2D_F8 tests because CK_ENABLE_FP8 is "
"not enabled"
;
}
}
};
TYPED_TEST_SUITE
(
MaxPool2D_F32
,
MaxPool2D_F32_Types
);
TYPED_TEST_SUITE
(
MaxPool2D_F16
,
MaxPool2D_F16_Types
);
TYPED_TEST_SUITE
(
MaxPool2D_BF16
,
MaxPool2D_BF16_Types
);
TYPED_TEST_SUITE
(
MaxPool2D_I8
,
MaxPool2D_I8_Types
);
TYPED_TEST_SUITE
(
MaxPool2D_F8
,
MaxPool2D_F8_Types
);
TYPED_TEST
(
MaxPool2D_F32
,
MaxPool2D_I8_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_F16
,
MaxPool2D_F16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_BF16
,
MaxPool2D_BF16_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_I8
,
MaxPool2D_I8_Test
)
{
this
->
Run
();
}
TYPED_TEST
(
MaxPool2D_F8
,
MaxPool2D_F8_Test
)
{
this
->
Run
();
}
Prev
1
…
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment