Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0a0c9527
"vscode:/vscode.git/clone" did not exist on "55c8486ee3313df6ca479143bb962c2138398fc3"
Commit
0a0c9527
authored
Jul 14, 2022
by
Chao Liu
Browse files
update include path
parent
615e1d3e
Changes
86
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
69 additions
and
432 deletions
+69
-432
profiler/include/profile_gemm_add_add_fastgelu_impl.hpp
profiler/include/profile_gemm_add_add_fastgelu_impl.hpp
+3
-3
profiler/include/profile_gemm_bias_add_reduce_impl.hpp
profiler/include/profile_gemm_bias_add_reduce_impl.hpp
+3
-3
profiler/include/profile_gemm_bilinear_impl.hpp
profiler/include/profile_gemm_bilinear_impl.hpp
+3
-3
profiler/include/profile_gemm_impl.hpp
profiler/include/profile_gemm_impl.hpp
+3
-3
profiler/include/profile_gemm_reduce_impl.hpp
profiler/include/profile_gemm_reduce_impl.hpp
+3
-3
profiler/include/profile_gemm_splitk_impl.hpp
profiler/include/profile_gemm_splitk_impl.hpp
+3
-3
profiler/include/profile_grouped_gemm_impl.hpp
profiler/include/profile_grouped_gemm_impl.hpp
+3
-3
profiler/include/profile_normalization_impl.hpp
profiler/include/profile_normalization_impl.hpp
+3
-3
profiler/include/profile_reduce_impl.hpp
profiler/include/profile_reduce_impl.hpp
+4
-4
profiler/src/profile_convnd_fwd.cpp
profiler/src/profile_convnd_fwd.cpp
+0
-359
profiler/src/profile_reduce.cpp
profiler/src/profile_reduce.cpp
+1
-1
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+17
-21
test/convnd_fwd/conv_util.hpp
test/convnd_fwd/conv_util.hpp
+1
-1
test/gemm/gemm_bf16.cpp
test/gemm/gemm_bf16.cpp
+3
-3
test/gemm/gemm_fp16.cpp
test/gemm/gemm_fp16.cpp
+3
-3
test/gemm/gemm_fp32.cpp
test/gemm/gemm_fp32.cpp
+3
-3
test/gemm/gemm_fp64.cpp
test/gemm/gemm_fp64.cpp
+3
-3
test/gemm/gemm_int8.cpp
test/gemm/gemm_int8.cpp
+3
-3
test/gemm/gemm_util.hpp
test/gemm/gemm_util.hpp
+3
-3
test/gemm_split_k/gemm_split_k.cpp
test/gemm_split_k/gemm_split_k.cpp
+4
-4
No files found.
profiler/include/profile_gemm_add_add_fastgelu_impl.hpp
View file @
0a0c9527
...
...
@@ -13,9 +13,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_gemm_bias_add_reduce_impl.hpp
View file @
0a0c9527
...
...
@@ -11,9 +11,9 @@
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_gemm_bilinear_impl.hpp
View file @
0a0c9527
...
...
@@ -13,9 +13,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_gemm_impl.hpp
View file @
0a0c9527
...
...
@@ -15,9 +15,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_gemm_reduce_impl.hpp
View file @
0a0c9527
...
...
@@ -11,9 +11,9 @@
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_gemm_splitk_impl.hpp
View file @
0a0c9527
...
...
@@ -15,9 +15,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_grouped_gemm_impl.hpp
View file @
0a0c9527
...
...
@@ -12,9 +12,9 @@
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
profiler/include/profile_normalization_impl.hpp
View file @
0a0c9527
...
...
@@ -10,9 +10,9 @@
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace
ck
{
...
...
profiler/include/profile_reduce_impl.hpp
View file @
0a0c9527
...
...
@@ -8,10 +8,10 @@
#include "ck/library/utility/check_err.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_reduction.hpp"
#include "ck/library/
host_tensor
/host_common_util.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_reduction.hpp"
#include "ck/library/
utility
/host_common_util.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
profiler/src/profile_convnd_fwd.cpp
deleted
100644 → 0
View file @
615e1d3e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/fill.hpp"
namespace
{
enum
struct
ConvDataType
{
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
BF16_BF16_BF16
,
// 2
INT8_INT8_INT8
,
// 3
};
enum
struct
ConvDataLayout
{
NCHW
,
// 0
NHWC
,
// 1
};
namespace
ctl
=
ck
::
tensor_layout
::
convolution
;
template
<
int
NDim
,
ConvDataLayout
DataLayout
>
struct
ConvolutionLayouts
;
template
<
>
struct
ConvolutionLayouts
<
1
,
ConvDataLayout
::
NHWC
>
{
typedef
ctl
::
NWC
Input
;
typedef
ctl
::
KXC
Weight
;
typedef
ctl
::
NWK
Output
;
};
template
<
>
struct
ConvolutionLayouts
<
2
,
ConvDataLayout
::
NHWC
>
{
typedef
ctl
::
NHWC
Input
;
typedef
ctl
::
KYXC
Weight
;
typedef
ctl
::
NHWK
Output
;
};
template
<
>
struct
ConvolutionLayouts
<
3
,
ConvDataLayout
::
NHWC
>
{
typedef
ctl
::
NDHWC
Input
;
typedef
ctl
::
KZYXC
Weight
;
typedef
ctl
::
NDHWK
Output
;
};
template
<
>
struct
ConvolutionLayouts
<
1
,
ConvDataLayout
::
NCHW
>
{
typedef
ctl
::
NCW
Input
;
typedef
ctl
::
KCX
Weight
;
typedef
ctl
::
NKW
Output
;
};
template
<
>
struct
ConvolutionLayouts
<
2
,
ConvDataLayout
::
NCHW
>
{
typedef
ctl
::
NCHW
Input
;
typedef
ctl
::
KCYX
Weight
;
typedef
ctl
::
NKHW
Output
;
};
template
<
>
struct
ConvolutionLayouts
<
3
,
ConvDataLayout
::
NCHW
>
{
typedef
ctl
::
NCDHW
Input
;
typedef
ctl
::
KCZYX
Weight
;
typedef
ctl
::
NKDHW
Output
;
};
void
print_use_msg
()
{
std
::
cout
<<
"arg1: tensor operation (conv_fwd: ForwardConvolution)
\n
"
<<
"arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)
\n
"
<<
"arg3: data layout (0: NCHW; 1: NHWC)
\n
"
<<
"arg4: verification (0=no, 1=yes)
\n
"
<<
"arg5: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg7: run kernel # of times (>1)
\n
"
<<
"arg8: N spatial dimensions (default 2)
\n
"
<<
"Following arguments (depending on number of spatial dims):
\n
"
<<
" N, K, C,
\n
"
<<
" <filter spatial dimensions>, (ie Y, X for 2D)
\n
"
<<
" <input image spatial dimensions>, (ie Hi, Wi for 2D)
\n
"
<<
" <strides>, (ie Sy, Sx for 2D)
\n
"
<<
" <dilations>, (ie Dy, Dx for 2D)
\n
"
<<
" <left padding>, (ie LeftPy, LeftPx for 2D)
\n
"
<<
" <right padding>, (ie RightPy, RightPx for 2D)
\n
"
<<
std
::
endl
;
}
ck
::
utils
::
conv
::
ConvParams
parse_params
(
int
num_dim_spatial
,
int
argc
,
char
*
argv
[])
{
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
int
conv_args
=
3
+
num_dim_spatial
*
6
;
int
cmdline_nargs
=
conv_args
+
9
;
if
(
cmdline_nargs
!=
argc
)
{
print_use_msg
();
exit
(
1
);
}
int
arg_idx
=
9
;
return
ck
::
utils
::
conv
::
parse_conv_params
(
num_dim_spatial
,
arg_idx
,
argv
);
}
template
<
int
NDim
,
typename
InDataType
,
typename
WeiDataType
,
typename
OutDataType
,
typename
ConvLayouts
>
void
profile_convnd_instances_impl
(
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
bool
do_verification
,
bool
do_log
,
bool
time_kernel
,
int
init_method
,
ConvLayouts
)
{
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
std
::
unique_ptr
<
OpInstance
<
OutDataType
,
InDataType
,
WeiDataType
>>
conv_instance
;
switch
(
init_method
)
{
case
0
:
conv_instance
=
std
::
make_unique
<
conv
::
ConvFwdOpInstance
<
InDataType
,
WeiDataType
,
OutDataType
,
typename
ConvLayouts
::
Input
,
typename
ConvLayouts
::
Weight
,
typename
ConvLayouts
::
Output
>>
(
params
,
false
);
break
;
case
1
:
conv_instance
=
std
::
make_unique
<
conv
::
ConvFwdOpInstance
<
InDataType
,
WeiDataType
,
OutDataType
,
typename
ConvLayouts
::
Input
,
typename
ConvLayouts
::
Weight
,
typename
ConvLayouts
::
Output
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>
,
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>>>
(
params
,
true
,
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>
{},
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>
{});
break
;
case
2
:
conv_instance
=
std
::
make_unique
<
conv
::
ConvFwdOpInstance
<
InDataType
,
WeiDataType
,
OutDataType
,
typename
ConvLayouts
::
Input
,
typename
ConvLayouts
::
Weight
,
typename
ConvLayouts
::
Output
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
utils
::
FillUniformDistribution
<
InDataType
>
,
ck
::
utils
::
FillUniformDistribution
<
WeiDataType
>>>
(
params
,
true
,
ck
::
utils
::
FillUniformDistribution
<
InDataType
>
{},
ck
::
utils
::
FillUniformDistribution
<
WeiDataType
>
{});
break
;
default:
throw
std
::
runtime_error
(
"Unsupported init method!"
);
}
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
NDim
,
InDataType
,
WeiDataType
,
OutDataType
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
InDataType
,
WeiDataType
,
OutDataType
>
run_engine
(
*
conv_instance
,
reference_conv_fwd_fun
,
do_verification
);
auto
best_conf
=
run_engine
.
Profile
(
conv
::
ConvolutionFwdInstances
<
InDataType
,
WeiDataType
,
OutDataType
>::
template
Get
<
NDim
>(),
time_kernel
,
do_verification
,
do_log
);
std
::
cout
<<
"Best configuration parameters:"
<<
"
\n
name: "
<<
best_conf
.
best_op_name
<<
"
\n
avg_time: "
<<
best_conf
.
best_avg_time
<<
"
\n
tflops: "
<<
best_conf
.
best_tflops
<<
"
\n
GB/s: "
<<
best_conf
.
best_gb_per_sec
<<
std
::
endl
;
}
template
<
int
NDim
>
void
profile_convnd_instances
(
ConvDataType
data_type
,
ConvDataLayout
data_layout
,
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
bool
do_verification
,
bool
do_log
,
bool
time_kernel
,
int
init_method
)
{
switch
(
data_layout
)
{
case
ConvDataLayout
::
NHWC
:
{
switch
(
data_type
)
{
case
ConvDataType
::
F32_F32_F32
:
profile_convnd_instances_impl
<
NDim
,
float
,
float
,
float
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
case
ConvDataType
::
F16_F16_F16
:
profile_convnd_instances_impl
<
NDim
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
case
ConvDataType
::
BF16_BF16_BF16
:
profile_convnd_instances_impl
<
NDim
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
case
ConvDataType
::
INT8_INT8_INT8
:
profile_convnd_instances_impl
<
NDim
,
int8_t
,
int8_t
,
int8_t
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
}
break
;
}
case
ConvDataLayout
::
NCHW
:
{
switch
(
data_type
)
{
case
ConvDataType
::
F32_F32_F32
:
profile_convnd_instances_impl
<
NDim
,
float
,
float
,
float
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
case
ConvDataType
::
F16_F16_F16
:
profile_convnd_instances_impl
<
NDim
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
case
ConvDataType
::
BF16_BF16_BF16
:
profile_convnd_instances_impl
<
NDim
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
case
ConvDataType
::
INT8_INT8_INT8
:
profile_convnd_instances_impl
<
NDim
,
int8_t
,
int8_t
,
int8_t
>
(
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
}
break
;
}
}
}
}
// namespace
int
profile_convnd_fwd
(
int
argc
,
char
*
argv
[])
{
using
namespace
ck
::
utils
::
conv
;
ConvDataType
data_type
{
ConvDataType
::
F32_F32_F32
};
ConvDataLayout
data_layout
{
ConvDataLayout
::
NHWC
};
bool
do_verification
{
true
};
int
init_method
{
2
};
bool
do_log
{
false
};
bool
time_kernel
{
false
};
int
num_dim_spatial
{
2
};
ConvParams
params
;
if
(
argc
>=
4
)
{
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
data_layout
=
static_cast
<
ConvDataLayout
>
(
std
::
stoi
(
argv
[
3
]));
}
if
(
argc
>=
9
)
{
do_verification
=
std
::
stoi
(
argv
[
4
]);
init_method
=
std
::
stoi
(
argv
[
5
]);
do_log
=
std
::
stoi
(
argv
[
6
]);
time_kernel
=
std
::
stoi
(
argv
[
7
]);
num_dim_spatial
=
std
::
stoi
(
argv
[
8
]);
}
if
(
argc
>=
10
)
{
params
=
parse_params
(
num_dim_spatial
,
argc
,
argv
);
}
// TODO Print nice message what is being profiled.
switch
(
num_dim_spatial
)
{
case
1
:
profile_convnd_instances
<
1
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
case
2
:
profile_convnd_instances
<
2
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
case
3
:
profile_convnd_instances
<
3
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
default:
throw
std
::
runtime_error
(
"profile_conv_fwd: unsupported num_dim_spatial value: "
+
std
::
to_string
(
num_dim_spatial
));
}
return
0
;
}
profiler/src/profile_reduce.cpp
View file @
0a0c9527
...
...
@@ -11,7 +11,7 @@
#include "ck/utility/reduction_enums.hpp"
#include "ck/library/
host_tensor
/host_common_util.hpp"
#include "ck/library/
utility
/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
#include "profiler/include/data_type_enum.hpp"
...
...
profiler/src/profiler.cpp
View file @
0a0c9527
...
...
@@ -3,23 +3,23 @@
#include <cstring>
//
int profile_gemm(int, char*[]);
//
int profile_gemm_splitk(int, char*[]);
//
int profile_gemm_bilinear(int, char*[]);
//
int profile_gemm_add_add_fastgelu(int, char*[]);
//
int profile_gemm_reduce(int, char*[]);
//
int profile_gemm_bias_add_reduce(int, char*[]);
//
int profile_batched_gemm(int, char*[]);
//
int profile_batched_gemm_reduce(int, char*[]);
//
int profile_grouped_gemm(int, char*[]);
int
profile_gemm
(
int
,
char
*
[]);
int
profile_gemm_splitk
(
int
,
char
*
[]);
int
profile_gemm_bilinear
(
int
,
char
*
[]);
int
profile_gemm_add_add_fastgelu
(
int
,
char
*
[]);
int
profile_gemm_reduce
(
int
,
char
*
[]);
int
profile_gemm_bias_add_reduce
(
int
,
char
*
[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
int
profile_batched_gemm_reduce
(
int
,
char
*
[]);
int
profile_grouped_gemm
(
int
,
char
*
[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu(int, char*[]);
//
int profile_conv_fwd_bias_relu_add(int, char*[]);
//
int profile_convnd_bwd_data(int, char*[], int);
//
int profile_conv_bwd_weight(int, char*[]);
//
int profile_
normalization
(int, char*[]);
//
int profile_
reduce
(int, char*[]);
//
int profile_
convnd_bwd_weight
(int, char*[]
, int
);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_convnd_bwd_data
(
int
,
char
*
[],
int
);
int
profile_conv_bwd_weight
(
int
,
char
*
[]);
int
profile_
convnd_bwd_weight
(
int
,
char
*
[]
,
int
);
int
profile_
normalization
(
int
,
char
*
[]);
int
profile_
reduce
(
int
,
char
*
[]);
static
void
print_helper_message
()
{
...
...
@@ -53,7 +53,6 @@ int main(int argc, char* argv[])
return
0
;
}
#if 0
if
(
strcmp
(
argv
[
1
],
"gemm"
)
==
0
)
{
return
profile_gemm
(
argc
,
argv
);
...
...
@@ -90,12 +89,10 @@ int main(int argc, char* argv[])
{
return
profile_grouped_gemm
(
argc
,
argv
);
}
#endif
if
(
strcmp
(
argv
[
1
],
"conv_fwd"
)
==
0
)
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd"
)
==
0
)
{
return
profile_conv_fwd
(
argc
,
argv
);
}
#if 0
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu"
)
==
0
)
{
return
profile_conv_fwd_bias_relu
(
argc
,
argv
);
...
...
@@ -141,7 +138,6 @@ int main(int argc, char* argv[])
{
return
profile_normalization
(
argc
,
argv
);
}
#endif
else
{
print_helper_message
();
...
...
test/convnd_fwd/conv_util.hpp
View file @
0a0c9527
...
...
@@ -10,7 +10,7 @@
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
test/gemm/gemm_bf16.cpp
View file @
0a0c9527
...
...
@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"
...
...
test/gemm/gemm_fp16.cpp
View file @
0a0c9527
...
...
@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"
...
...
test/gemm/gemm_fp32.cpp
View file @
0a0c9527
...
...
@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"
...
...
test/gemm/gemm_fp64.cpp
View file @
0a0c9527
...
...
@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"
...
...
test/gemm/gemm_int8.cpp
View file @
0a0c9527
...
...
@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"
...
...
test/gemm/gemm_util.hpp
View file @
0a0c9527
...
...
@@ -6,9 +6,9 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace
ck
{
...
...
test/gemm_split_k/gemm_split_k.cpp
View file @
0a0c9527
...
...
@@ -14,12 +14,12 @@
#include "ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/
host_tensor
/host_gemm.hpp"
#include "ck/library/
utility
/host_gemm.hpp"
enum
struct
GemmMatrixLayout
{
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment