Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b054669b
"src/scanscalar.cpp" did not exist on "ba132b01bc9d637e800723b5303c0dbfa0de7277"
Commit
b054669b
authored
Jul 14, 2022
by
Chao Liu
Browse files
update profiler for conv bwd data and weight
parent
6b6360b1
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
250 additions
and
633 deletions
+250
-633
profiler/src/profile_conv_bwd_weight.cpp
profiler/src/profile_conv_bwd_weight.cpp
+203
-112
profiler/src/profile_conv_fwd.cpp
profiler/src/profile_conv_fwd.cpp
+40
-36
profiler/src/profile_convnd_bwd_data.cpp
profiler/src/profile_convnd_bwd_data.cpp
+0
-229
profiler/src/profile_convnd_bwd_weight.cpp
profiler/src/profile_convnd_bwd_weight.cpp
+0
-226
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+7
-30
No files found.
profiler/src/profile_conv_bwd_weight.cpp
View file @
b054669b
...
@@ -8,141 +8,232 @@
...
@@ -8,141 +8,232 @@
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
namespace
{
enum
struct
ConvLayout
{
NCHW_KYXC_NKHW
,
// 0
NHWC_KYXC_NHWK
,
// 1
};
enum
struct
ConvDataType
enum
struct
ConvDataType
{
{
F32_F32_F32
,
// 0
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
F16_F16_F16
,
// 1
BF16_BF16_BF16
,
// 2
BF16_BF16_BF16
,
// 2
INT8_INT8_INT8
,
// 3
};
};
enum
struct
ConvInputLayout
static
void
print_helper_msg
()
{
{
NCHW
,
// 0
// clang-format-off
NHWC
,
// 1
std
::
cout
<<
"arg1: tensor operation (conv_bww: ConvolutionBackwardWeight, Input * d_Output = "
};
"d_Weight)
\n
"
<<
"arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)
\n
"
<<
"arg3: tensor layout (0: Input[N, C, Hi, Wi] * d_Output[N, K, Ho, Wo] = "
"d_Weight[K, C, Y, X]
\n
"
<<
" 1: Input[N, Hi, Wi, C] * d_Output[N, Ho, Wo, K] = "
"d_Weight[K, Y, X, C] )
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg7: time kernel (0: no, 1: yes)
\n
"
<<
"arg8: N spatial dimensions
\n
"
<<
"Following arguments (depending on number of spatial dims):
\n
"
<<
" N, K, C,
\n
"
<<
" <filter spatial dimensions>, (ie Y, X for 2D)
\n
"
<<
" <input image spatial dimensions>, (ie Hi, Wi for 2D)
\n
"
<<
" <strides>, (ie Sy, Sx for 2D)
\n
"
<<
" <dilations>, (ie Dy, Dx for 2D)
\n
"
<<
" <left padding>, (ie LeftPy, LeftPx for 2D)
\n
"
<<
" <right padding>, (ie RightPy, RightPx for 2D)
\n
"
<<
" SplitK
\n
"
<<
std
::
endl
;
// clang-format-on
}
enum
struct
ConvWeightLayout
ck
::
tensor_operation
::
device
::
ConvParams
parse_conv_params
(
int
num_dim_spatial
,
int
arg_idx
,
char
*
const
argv
[])
{
{
KCYX
,
// 0
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
arg_idx
++
]);
KYXC
,
// 1
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
;
const
ck
::
index_t
C
=
std
::
stoi
(
argv
[
arg_idx
++
])
;
enum
struct
ConvOutputLayout
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
(
num_dim_spatial
);
{
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
(
num_dim_spatial
);
NKHW
,
// 0
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
(
num_dim_spatial
);
NHWK
,
// 1
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
(
num_dim_spatial
);
};
std
::
vector
<
ck
::
index_t
>
input_left_pads
(
num_dim_spatial
);
std
::
vector
<
ck
::
index_t
>
input_right_pads
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
filter_spatial_lengths
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
input_spatial_lengths
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
conv_filter_strides
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
conv_filter_dilations
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
input_left_pads
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
input_right_pads
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
return
ck
::
tensor_operation
::
device
::
ConvParams
{
num_dim_spatial
,
N
,
K
,
C
,
filter_spatial_lengths
,
input_spatial_lengths
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
};
}
}
// namespace
int
profile_conv_bwd_weight
(
int
argc
,
char
*
argv
[])
int
profile_conv_bwd_weight
(
int
argc
,
char
*
argv
[])
{
{
if
(
argc
!=
26
)
// 8 for control, 1 for num_dim_spatial
if
(
argc
<
9
)
{
{
printf
(
"arg1: tensor operation (conv_fwd: ForwardConvolution)
\n
"
);
print_helper_msg
();
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
return
1
;
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
printf
(
"arg5: output tensor layout (0: NKHW; 1: NHWK)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9: run kernel # of times (>1)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
printf
(
"arg25: split k (>=1)
\n
"
);
exit
(
1
);
}
}
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
in_layout
=
static_cast
<
ConvInputLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
auto
layout
=
static_cast
<
ConvLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
auto
wei_layout
=
static_cast
<
ConvWeightLayout
>
(
std
::
stoi
(
argv
[
4
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
auto
out_layout
=
static_cast
<
ConvOutputLayout
>
(
std
::
stoi
(
argv
[
5
]));
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
num_dim_spatial
=
std
::
stoi
(
argv
[
8
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
// 8 for control, 1 for num_dim_spatial, 3 for N/K/C, and 6 * num_dim_spatial, 1 for split-K
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
if
(
argc
!=
8
+
4
+
6
*
num_dim_spatial
+
1
)
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
C
=
std
::
stoi
(
argv
[
12
]);
const
ck
::
index_t
Y
=
std
::
stoi
(
argv
[
13
]);
const
ck
::
index_t
X
=
std
::
stoi
(
argv
[
14
]);
const
ck
::
index_t
Hi
=
std
::
stoi
(
argv
[
15
]);
const
ck
::
index_t
Wi
=
std
::
stoi
(
argv
[
16
]);
const
ck
::
index_t
conv_stride_h
=
std
::
stoi
(
argv
[
17
]);
const
ck
::
index_t
conv_stride_w
=
std
::
stoi
(
argv
[
18
]);
const
ck
::
index_t
conv_dilation_h
=
std
::
stoi
(
argv
[
19
]);
const
ck
::
index_t
conv_dilation_w
=
std
::
stoi
(
argv
[
20
]);
const
ck
::
index_t
in_left_pad_h
=
std
::
stoi
(
argv
[
21
]);
const
ck
::
index_t
in_left_pad_w
=
std
::
stoi
(
argv
[
22
]);
const
ck
::
index_t
in_right_pad_h
=
std
::
stoi
(
argv
[
23
]);
const
ck
::
index_t
in_right_pad_w
=
std
::
stoi
(
argv
[
24
]);
ck
::
index_t
split_k
=
std
::
stoi
(
argv
[
25
]);
split_k
=
std
::
max
(
1
,
split_k
);
const
ck
::
index_t
YEff
=
(
Y
-
1
)
*
conv_dilation_h
+
1
;
const
ck
::
index_t
XEff
=
(
X
-
1
)
*
conv_dilation_w
+
1
;
const
ck
::
index_t
Ho
=
(
Hi
+
in_left_pad_h
+
in_right_pad_h
-
YEff
)
/
conv_stride_h
+
1
;
const
ck
::
index_t
Wo
=
(
Wi
+
in_left_pad_w
+
in_right_pad_w
-
XEff
)
/
conv_stride_w
+
1
;
if
(
data_type
==
ConvDataType
::
F32_F32_F32
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
{
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
print_helper_msg
();
float
,
return
1
;
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
N
,
K
,
C
,
std
::
vector
<
ck
::
index_t
>
{
Hi
,
Wi
},
std
::
vector
<
ck
::
index_t
>
{
Y
,
X
},
std
::
vector
<
ck
::
index_t
>
{
Ho
,
Wo
},
std
::
vector
<
ck
::
index_t
>
{
conv_stride_h
,
conv_stride_w
},
std
::
vector
<
ck
::
index_t
>
{
conv_dilation_h
,
conv_dilation_w
},
std
::
vector
<
ck
::
index_t
>
{
in_left_pad_h
,
in_left_pad_w
},
std
::
vector
<
ck
::
index_t
>
{
in_right_pad_h
,
in_right_pad_w
},
split_k
);
}
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
const
auto
params
=
parse_conv_params
(
num_dim_spatial
,
9
,
argv
);
ck
::
index_t
split_k
=
std
::
stoi
(
argv
[
8
+
4
+
6
*
num_dim_spatial
]);
split_k
=
std
::
max
(
1
,
split_k
);
using
F32
=
float
;
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
NWC
=
ck
::
tensor_layout
::
convolution
::
NWC
;
using
NHWC
=
ck
::
tensor_layout
::
convolution
::
NHWC
;
using
NDHWC
=
ck
::
tensor_layout
::
convolution
::
NDHWC
;
using
KXC
=
ck
::
tensor_layout
::
convolution
::
KXC
;
using
KYXC
=
ck
::
tensor_layout
::
convolution
::
KYXC
;
using
KZYXC
=
ck
::
tensor_layout
::
convolution
::
KZYXC
;
using
NWK
=
ck
::
tensor_layout
::
convolution
::
NWK
;
using
NHWK
=
ck
::
tensor_layout
::
convolution
::
NHWK
;
using
NDHWK
=
ck
::
tensor_layout
::
convolution
::
NDHWK
;
constexpr
auto
I1
=
ck
::
Number
<
1
>
{};
constexpr
auto
I2
=
ck
::
Number
<
2
>
{};
constexpr
auto
I3
=
ck
::
Number
<
3
>
{};
auto
profile
=
[
&
](
auto
num_dim_spatial_tmp
,
auto
in_layout
,
auto
wei_layout
,
auto
out_layout
,
auto
in_type
,
auto
wei_type
,
auto
out_type
)
{
constexpr
ck
::
index_t
NDimSpatial
=
num_dim_spatial_tmp
.
value
;
using
InLayout
=
decltype
(
in_layout
);
using
WeiLayout
=
decltype
(
wei_layout
);
using
OutLayout
=
decltype
(
out_layout
);
using
InDataType
=
decltype
(
in_type
);
using
WeiDataType
=
decltype
(
wei_type
);
using
OutDataType
=
decltype
(
out_type
);
bool
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
,
split_k
);
return
pass
?
0
:
1
;
};
if
(
num_dim_spatial
==
1
&&
layout
==
ConvLayout
::
NHWC_KYXC_NHWK
)
{
{
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
ck
::
half_t
,
{
ck
::
half_t
,
return
profile
(
I1
,
NWC
{},
KXC
{},
NWK
{},
F32
{},
F32
{},
F32
{});
ck
::
half_t
,
}
ck
::
tensor_layout
::
convolution
::
NHWC
,
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
)
ck
::
tensor_layout
::
convolution
::
KYXC
,
{
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
return
profile
(
I1
,
NWC
{},
KXC
{},
NWK
{},
F16
{},
F16
{},
F16
{});
do_verification
,
}
init_method
,
else
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
do_log
,
{
time_kernel
,
return
profile
(
I1
,
NWC
{},
KXC
{},
NWK
{},
BF16
{},
BF16
{},
BF16
{});
N
,
}
K
,
C
,
std
::
vector
<
ck
::
index_t
>
{
Hi
,
Wi
},
std
::
vector
<
ck
::
index_t
>
{
Y
,
X
},
std
::
vector
<
ck
::
index_t
>
{
Ho
,
Wo
},
std
::
vector
<
ck
::
index_t
>
{
conv_stride_h
,
conv_stride_w
},
std
::
vector
<
ck
::
index_t
>
{
conv_dilation_h
,
conv_dilation_w
},
std
::
vector
<
ck
::
index_t
>
{
in_left_pad_h
,
in_left_pad_w
},
std
::
vector
<
ck
::
index_t
>
{
in_right_pad_h
,
in_right_pad_w
},
split_k
);
}
}
else
else
if
(
num_dim_spatial
==
2
&&
layout
==
ConvLayout
::
NHWC_KYXC_NHWK
)
{
{
throw
std
::
runtime_error
(
"wrong! this Conv data_type & layout is not implemented"
);
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
{
return
profile
(
I2
,
NHWC
{},
KYXC
{},
NHWK
{},
F32
{},
F32
{},
F32
{});
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
)
{
return
profile
(
I2
,
NHWC
{},
KYXC
{},
NHWK
{},
F16
{},
F16
{},
F16
{});
}
else
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
{
return
profile
(
I2
,
NHWC
{},
KYXC
{},
NHWK
{},
BF16
{},
BF16
{},
BF16
{});
}
}
}
else
if
(
num_dim_spatial
==
3
&&
layout
==
ConvLayout
::
NHWC_KYXC_NHWK
)
{
if
(
data_type
==
ConvDataType
::
F32_F32_F32
)
{
return
profile
(
I3
,
NDHWC
{},
KZYXC
{},
NDHWK
{},
F32
{},
F32
{},
F32
{});
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
)
{
return
profile
(
I3
,
NDHWC
{},
KZYXC
{},
NDHWK
{},
F16
{},
F16
{},
F16
{});
}
else
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
)
{
return
profile
(
I3
,
NDHWC
{},
KZYXC
{},
NDHWK
{},
BF16
{},
BF16
{},
BF16
{});
}
}
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
return
0
;
return
1
;
}
}
profiler/src/profile_conv_fwd.cpp
View file @
b054669b
...
@@ -8,6 +8,8 @@
...
@@ -8,6 +8,8 @@
#include "profiler/include/profile_conv_fwd_impl.hpp"
#include "profiler/include/profile_conv_fwd_impl.hpp"
namespace
{
enum
struct
ConvLayout
enum
struct
ConvLayout
{
{
NCHW_KYXC_NKHW
,
// 0
NCHW_KYXC_NKHW
,
// 0
...
@@ -25,26 +27,26 @@ enum struct ConvDataType
...
@@ -25,26 +27,26 @@ enum struct ConvDataType
static
void
print_helper_msg
()
static
void
print_helper_msg
()
{
{
// clang-format-off
// clang-format-off
std
::
cout
<<
"arg1: tensor operation (conv_fwd: ForwardConvolution)
\n
"
std
::
cout
<<
"arg
2
:
data type (0: fp32; 1: fp16, 2: bf16, 3: int8
)
\n
"
<<
"arg
1
:
tensor operation (conv_fwd: Convolution Forward
)
\n
"
<<
"arg
3
:
tensor layout (0: Input[N, C, Hi, Wi] * Weight[K, C, Y, X] = Output[N, K,
"
<<
"arg
2
:
data type (0: fp32; 1: fp16, 2: bf16, 3: int8)
\n
"
"
Ho, Wo]
\n
"
<<
"arg3: tensor layout (0: Input[N, C, Hi, Wi], Weight[K, C, Y, X], Output[N, K,
Ho, Wo]
\n
"
<<
" 1: Input[N, Hi, Wi, C]
*
Weight[K, Y, X, C]
=
Output[N, Ho, "
<<
" 1: Input[N, Hi, Wi, C]
,
Weight[K, Y, X, C]
,
Output[N, Ho,
Wo,
"
"Wo,
K])
\n
"
"
K])
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg4: verification (0: no, 1: yes)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg5: initialization (0: no init, 1: integer value, 2: decimal value)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg6: print tensor value (0: no; 1: yes)
\n
"
<<
"arg7: time kernel (0: no, 1: yes)
\n
"
<<
"arg7: time kernel (0: no, 1: yes)
\n
"
<<
"arg8: N spatial dimensions
\n
"
<<
"arg8: N spatial dimensions
\n
"
<<
"Following arguments (depending on number of spatial dims):
\n
"
<<
"Following arguments (depending on number of spatial dims):
\n
"
<<
" N, K, C,
\n
"
<<
" N, K, C,
\n
"
<<
" <filter spatial dimensions>, (ie Y, X for 2D)
\n
"
<<
" <filter spatial dimensions>, (ie Y, X for 2D)
\n
"
<<
" <input image spatial dimensions>, (ie Hi, Wi for 2D)
\n
"
<<
" <input image spatial dimensions>, (ie Hi, Wi for 2D)
\n
"
<<
" <strides>, (ie Sy, Sx for 2D)
\n
"
<<
" <strides>, (ie Sy, Sx for 2D)
\n
"
<<
" <dilations>, (ie Dy, Dx for 2D)
\n
"
<<
" <dilations>, (ie Dy, Dx for 2D)
\n
"
<<
" <left padding>, (ie LeftPy, LeftPx for 2D)
\n
"
<<
" <left padding>, (ie LeftPy, LeftPx for 2D)
\n
"
<<
" <right padding>, (ie RightPy, RightPx for 2D)
\n
"
<<
" <right padding>, (ie RightPy, RightPx for 2D)
\n
"
<<
std
::
endl
;
<<
std
::
endl
;
// clang-format-on
// clang-format-on
}
}
...
@@ -104,13 +106,15 @@ parse_conv_params(int num_dim_spatial, int arg_idx, char* const argv[])
...
@@ -104,13 +106,15 @@ parse_conv_params(int num_dim_spatial, int arg_idx, char* const argv[])
input_right_pads
};
input_right_pads
};
}
}
}
// namespace
int
profile_conv_fwd
(
int
argc
,
char
*
argv
[])
int
profile_conv_fwd
(
int
argc
,
char
*
argv
[])
{
{
// 8 for control, 1 for num_dim_spatial
// 8 for control, 1 for num_dim_spatial
if
(
argc
<
9
)
if
(
argc
<
9
)
{
{
print_helper_msg
();
print_helper_msg
();
exit
(
1
)
;
return
1
;
}
}
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
...
@@ -125,7 +129,7 @@ int profile_conv_fwd(int argc, char* argv[])
...
@@ -125,7 +129,7 @@ int profile_conv_fwd(int argc, char* argv[])
if
(
argc
!=
8
+
4
+
6
*
num_dim_spatial
)
if
(
argc
!=
8
+
4
+
6
*
num_dim_spatial
)
{
{
print_helper_msg
();
print_helper_msg
();
exit
(
1
)
;
return
1
;
}
}
const
auto
params
=
parse_conv_params
(
num_dim_spatial
,
9
,
argv
);
const
auto
params
=
parse_conv_params
(
num_dim_spatial
,
9
,
argv
);
...
@@ -152,29 +156,29 @@ int profile_conv_fwd(int argc, char* argv[])
...
@@ -152,29 +156,29 @@ int profile_conv_fwd(int argc, char* argv[])
constexpr
auto
I3
=
ck
::
Number
<
3
>
{};
constexpr
auto
I3
=
ck
::
Number
<
3
>
{};
auto
profile
=
[
&
](
auto
num_dim_spatial_tmp
,
auto
profile
=
[
&
](
auto
num_dim_spatial_tmp
,
auto
in_type
,
auto
wei_type
,
auto
out_type
,
auto
in_layout
,
auto
in_layout
,
auto
wei_layout
,
auto
wei_layout
,
auto
out_layout
)
{
auto
out_layout
,
constexpr
ck
::
index_t
NumDimSpatial
=
num_dim_spatial_tmp
.
value
;
auto
in_type
,
auto
wei_type
,
using
InDataType
=
decltype
(
in_type
);
auto
out_type
)
{
using
WeiDataType
=
decltype
(
wei_type
);
constexpr
ck
::
index_t
NDimSpatial
=
num_dim_spatial_tmp
.
value
;
using
OutDataType
=
decltype
(
out_type
);
using
InLayout
=
decltype
(
in_layout
);
using
InLayout
=
decltype
(
in_layout
);
using
WeiLayout
=
decltype
(
wei_layout
);
using
WeiLayout
=
decltype
(
wei_layout
);
using
OutLayout
=
decltype
(
out_layout
);
using
OutLayout
=
decltype
(
out_layout
);
bool
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
NumDimSpatial
,
using
InDataType
=
decltype
(
in_type
);
InDataType
,
using
WeiDataType
=
decltype
(
wei_type
);
WeiDataType
,
using
OutDataType
=
decltype
(
out_type
);
OutDataType
,
bool
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
NDimSpatial
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
OutLayout
>
(
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
);
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
);
return
pass
?
0
:
1
;
return
pass
?
0
:
1
;
...
...
profiler/src/profile_convnd_bwd_data.cpp
deleted
100644 → 0
View file @
6b6360b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "profiler/include/profile_convnd_bwd_data_impl.hpp"
namespace
{
enum
struct
ConvDataType
{
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
BF16_BF16_BF16
,
// 2
INT8_INT8_INT8
,
// 3
};
enum
struct
ConvInputLayout
{
NCHW
,
// 0
NHWC
,
// 1
};
enum
struct
ConvWeightLayout
{
KCYX
,
// 0
KYXC
,
// 1
};
enum
struct
ConvOutputLayout
{
NKHW
,
// 0
NHWK
,
// 1
};
ck
::
utils
::
conv
::
ConvParams
parse_conv_params
(
int
num_dim_spatial
,
char
*
argv
[],
int
arg_idx
)
{
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial_
=
num_dim_spatial
;
params
.
N_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
K_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
C_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
filter_spatial_lengths_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
filter_spatial_lengths_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_spatial_lengths_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_spatial_lengths_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
conv_filter_strides_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
conv_filter_strides_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
conv_filter_dilations_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
conv_filter_dilations_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_left_pads_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_left_pads_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_right_pads_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_right_pads_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
return
params
;
}
}
// namespace
int
profile_convnd_bwd_data
(
int
argc
,
char
*
argv
[],
int
num_dim_spatial
)
{
const
int
preParams
=
10
;
int
conv_args
=
3
+
num_dim_spatial
*
6
;
int
cmdline_nargs
=
conv_args
+
preParams
;
if
(
cmdline_nargs
!=
argc
)
{
printf
(
"arg1: tensor operation (conv[1|2|3]d_bwd_data: BackwardConvolution)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
printf
(
"arg5: output tensor layout (0: NKHW; 1: NHWK)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9: time kernel (0=n0, 1=yes)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
return
1
;
}
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
in_layout
=
static_cast
<
ConvInputLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
auto
wei_layout
=
static_cast
<
ConvWeightLayout
>
(
std
::
stoi
(
argv
[
4
]));
const
auto
out_layout
=
static_cast
<
ConvOutputLayout
>
(
std
::
stoi
(
argv
[
5
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
ck
::
utils
::
conv
::
ConvParams
params
=
parse_conv_params
(
num_dim_spatial
,
argv
,
preParams
);
auto
Run
=
[
&
](
auto
input_type
,
auto
wei_type
,
auto
out_type
,
auto
acc_type
)
{
using
InDataType
=
decltype
(
input_type
);
using
WeiDataType
=
decltype
(
wei_type
);
using
OutDataType
=
decltype
(
out_type
);
using
AccDataType
=
decltype
(
acc_type
);
switch
(
num_dim_spatial
)
{
case
1
:
ck
::
profiler
::
profile_convnd_bwd_data_impl
<
1
,
InDataType
,
WeiDataType
,
OutDataType
,
AccDataType
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
.
N_
,
params
.
K_
,
params
.
C_
,
params
.
input_spatial_lengths_
,
params
.
filter_spatial_lengths_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides_
,
params
.
conv_filter_dilations_
,
params
.
input_left_pads_
,
params
.
input_right_pads_
);
break
;
case
2
:
ck
::
profiler
::
profile_convnd_bwd_data_impl
<
2
,
InDataType
,
WeiDataType
,
OutDataType
,
AccDataType
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
.
N_
,
params
.
K_
,
params
.
C_
,
params
.
input_spatial_lengths_
,
params
.
filter_spatial_lengths_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides_
,
params
.
conv_filter_dilations_
,
params
.
input_left_pads_
,
params
.
input_right_pads_
);
break
;
case
3
:
ck
::
profiler
::
profile_convnd_bwd_data_impl
<
3
,
InDataType
,
WeiDataType
,
OutDataType
,
AccDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
.
N_
,
params
.
K_
,
params
.
C_
,
params
.
input_spatial_lengths_
,
params
.
filter_spatial_lengths_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides_
,
params
.
conv_filter_dilations_
,
params
.
input_left_pads_
,
params
.
input_right_pads_
);
break
;
default:
break
;
}
};
if
(
data_type
==
ConvDataType
::
F32_F32_F32
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
float
{},
float
{},
float
{},
float
{});
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
ck
::
half_t
{},
ck
::
half_t
{},
ck
::
half_t
{},
float
{});
}
else
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
ck
::
bhalf_t
{},
ck
::
bhalf_t
{},
ck
::
bhalf_t
{},
float
{});
}
else
if
(
data_type
==
ConvDataType
::
INT8_INT8_INT8
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
int8_t
{},
int8_t
{},
int8_t
{},
int32_t
{});
}
else
{
std
::
cout
<<
"wrong! this Conv data_type & layout is not implemented"
<<
std
::
endl
;
return
1
;
}
return
0
;
}
profiler/src/profile_convnd_bwd_weight.cpp
deleted
100644 → 0
View file @
6b6360b1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "profiler/include/profile_convnd_bwd_weight_impl.hpp"
namespace
{
enum
struct
ConvDataType
{
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
BF16_BF16_BF16
,
// 2
};
enum
struct
ConvInputLayout
{
NCHW
,
// 0
NHWC
,
// 1
};
enum
struct
ConvWeightLayout
{
KCYX
,
// 0
KYXC
,
// 1
};
enum
struct
ConvOutputLayout
{
NKHW
,
// 0
NHWK
,
// 1
};
ck
::
utils
::
conv
::
ConvParams
parse_conv_params
(
int
num_dim_spatial
,
char
*
argv
[],
int
arg_idx
)
{
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial_
=
num_dim_spatial
;
params
.
N_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
K_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
C_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
filter_spatial_lengths_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
filter_spatial_lengths_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_spatial_lengths_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_spatial_lengths_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
conv_filter_strides_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
conv_filter_strides_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
conv_filter_dilations_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
conv_filter_dilations_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_left_pads_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_left_pads_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_right_pads_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_right_pads_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
return
params
;
}
}
// namespace
int
profile_convnd_bwd_weight
(
int
argc
,
char
*
argv
[],
int
num_dim_spatial
)
{
const
int
preParams
=
11
;
int
conv_args
=
3
+
num_dim_spatial
*
6
;
int
cmdline_nargs
=
conv_args
+
preParams
;
if
(
cmdline_nargs
!=
argc
)
{
printf
(
"arg1: tensor operation (convnd[1|2|3]d_bwd_weight: BackwardConvolution)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16, 2: bf16)
\n
"
);
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
printf
(
"arg5: output tensor layout (0: NKHW; 1: NHWK)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9: time kernel (0=n0, 1=yes)
\n
"
);
printf
(
"arg10: splitk
\n
"
);
printf
(
"arg11 to 25: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
return
1
;
}
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
in_layout
=
static_cast
<
ConvInputLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
auto
wei_layout
=
static_cast
<
ConvWeightLayout
>
(
std
::
stoi
(
argv
[
4
]));
const
auto
out_layout
=
static_cast
<
ConvOutputLayout
>
(
std
::
stoi
(
argv
[
5
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
ck
::
index_t
split_k
=
std
::
stoi
(
argv
[
10
]);
split_k
=
std
::
max
(
1
,
split_k
);
ck
::
utils
::
conv
::
ConvParams
params
=
parse_conv_params
(
num_dim_spatial
,
argv
,
preParams
);
auto
Run
=
[
&
](
auto
input_type
,
auto
wei_type
,
auto
out_type
)
{
using
InDataType
=
decltype
(
input_type
);
using
WeiDataType
=
decltype
(
wei_type
);
using
OutDataType
=
decltype
(
out_type
);
switch
(
num_dim_spatial
)
{
case
1
:
ck
::
profiler
::
profile_convnd_bwd_weight_impl
<
1
,
InDataType
,
WeiDataType
,
OutDataType
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
.
N_
,
params
.
K_
,
params
.
C_
,
params
.
input_spatial_lengths_
,
params
.
filter_spatial_lengths_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides_
,
params
.
conv_filter_dilations_
,
params
.
input_left_pads_
,
params
.
input_right_pads_
,
split_k
);
break
;
case
2
:
ck
::
profiler
::
profile_convnd_bwd_weight_impl
<
2
,
InDataType
,
WeiDataType
,
OutDataType
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
.
N_
,
params
.
K_
,
params
.
C_
,
params
.
input_spatial_lengths_
,
params
.
filter_spatial_lengths_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides_
,
params
.
conv_filter_dilations_
,
params
.
input_left_pads_
,
params
.
input_right_pads_
,
split_k
);
break
;
case
3
:
ck
::
profiler
::
profile_convnd_bwd_weight_impl
<
3
,
InDataType
,
WeiDataType
,
OutDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
params
.
N_
,
params
.
K_
,
params
.
C_
,
params
.
input_spatial_lengths_
,
params
.
filter_spatial_lengths_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides_
,
params
.
conv_filter_dilations_
,
params
.
input_left_pads_
,
params
.
input_right_pads_
,
split_k
);
break
;
default:
break
;
}
};
if
(
data_type
==
ConvDataType
::
F32_F32_F32
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
float
{},
float
{},
float
{});
}
else
if
(
data_type
==
ConvDataType
::
F16_F16_F16
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
ck
::
half_t
{},
ck
::
half_t
{},
ck
::
half_t
{});
}
else
if
(
data_type
==
ConvDataType
::
BF16_BF16_BF16
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
Run
(
ck
::
bhalf_t
{},
ck
::
bhalf_t
{},
ck
::
bhalf_t
{});
}
else
{
std
::
cout
<<
"wrong! this Conv data_type & layout is not implemented"
<<
std
::
endl
;
return
1
;
}
return
0
;
}
profiler/src/profiler.cpp
View file @
b054669b
...
@@ -15,9 +15,8 @@ int profile_grouped_gemm(int, char*[]);
...
@@ -15,9 +15,8 @@ int profile_grouped_gemm(int, char*[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv
nd
_bwd_data
(
int
,
char
*
[]
,
int
);
int
profile_conv_bwd_data
(
int
,
char
*
[]);
int
profile_conv_bwd_weight
(
int
,
char
*
[]);
int
profile_conv_bwd_weight
(
int
,
char
*
[]);
int
profile_convnd_bwd_weight
(
int
,
char
*
[],
int
);
int
profile_normalization
(
int
,
char
*
[]);
int
profile_normalization
(
int
,
char
*
[]);
int
profile_reduce
(
int
,
char
*
[]);
int
profile_reduce
(
int
,
char
*
[]);
...
@@ -33,13 +32,11 @@ static void print_helper_message()
...
@@ -33,13 +32,11 @@ static void print_helper_message()
" batched_gemm: Batched GEMM
\n
"
" batched_gemm: Batched GEMM
\n
"
" batched_gemm_reduce: Batched GEMM+Reduce
\n
"
" batched_gemm_reduce: Batched GEMM+Reduce
\n
"
" grouped_gemm: Grouped GEMM
\n
"
" grouped_gemm: Grouped GEMM
\n
"
" conv_fwd:
Forward
Convolution
\n
"
" conv_fwd: Convolution
Forward
\n
"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU
\n
"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU
\n
"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add
\n
"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add
\n
"
" conv1d_bwd_data: BackwardConvolution data 1 dim
\n
"
" conv_bwd_data: Convolution Backward Data
\n
"
" conv2d_bwd_data: BackwardConvolution data 2 dim
\n
"
" conv_bwd_weight: Convolution Backward Weight
\n
"
" conv3d_bwd_data: BackwardConvolution data 3 dim
\n
"
" conv2d_bwd_weight: Backward Weight Convolution 2d
\n
"
" reduce: Reduce
\n
"
);
" reduce: Reduce
\n
"
);
// clang-format on
// clang-format on
}
}
...
@@ -101,34 +98,14 @@ int main(int argc, char* argv[])
...
@@ -101,34 +98,14 @@ int main(int argc, char* argv[])
{
{
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"conv
1d
_bwd_data"
)
==
0
)
else
if
(
strcmp
(
argv
[
1
],
"conv_bwd_data"
)
==
0
)
{
{
return
profile_conv
nd
_bwd_data
(
argc
,
argv
,
1
);
return
profile_conv_bwd_data
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"conv2d_bwd_data"
)
==
0
)
else
if
(
strcmp
(
argv
[
1
],
"conv_bwd_weight"
)
==
0
)
{
return
profile_convnd_bwd_data
(
argc
,
argv
,
2
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv3d_bwd_data"
)
==
0
)
{
return
profile_convnd_bwd_data
(
argc
,
argv
,
3
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv2d_bwd_weight"
)
==
0
)
{
{
return
profile_conv_bwd_weight
(
argc
,
argv
);
return
profile_conv_bwd_weight
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"convnd1d_bwd_weight"
)
==
0
)
{
return
profile_convnd_bwd_weight
(
argc
,
argv
,
1
);
}
else
if
(
strcmp
(
argv
[
1
],
"convnd2d_bwd_weight"
)
==
0
)
{
return
profile_convnd_bwd_weight
(
argc
,
argv
,
2
);
}
else
if
(
strcmp
(
argv
[
1
],
"convnd3d_bwd_weight"
)
==
0
)
{
return
profile_convnd_bwd_weight
(
argc
,
argv
,
3
);
}
else
if
(
strcmp
(
argv
[
1
],
"reduce"
)
==
0
)
else
if
(
strcmp
(
argv
[
1
],
"reduce"
)
==
0
)
{
{
return
profile_reduce
(
argc
,
argv
);
return
profile_reduce
(
argc
,
argv
);
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment