Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e00a943e
Commit
e00a943e
authored
May 17, 2022
by
myamlak
Browse files
Merge remote-tracking branch 'origin/develop' into myamlak/cgemm
parents
ffe12e2e
9f71ff48
Changes
162
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
188 additions
and
199 deletions
+188
-199
profiler/include/profile_gemm_bias_relu_add_impl.hpp
profiler/include/profile_gemm_bias_relu_add_impl.hpp
+3
-2
profiler/include/profile_gemm_bias_relu_impl.hpp
profiler/include/profile_gemm_bias_relu_impl.hpp
+3
-2
profiler/include/profile_gemm_impl.hpp
profiler/include/profile_gemm_impl.hpp
+3
-2
profiler/include/profile_gemm_reduce_impl.hpp
profiler/include/profile_gemm_reduce_impl.hpp
+7
-25
profiler/include/profile_grouped_gemm_impl.hpp
profiler/include/profile_grouped_gemm_impl.hpp
+3
-2
profiler/include/profile_reduce_impl.hpp
profiler/include/profile_reduce_impl.hpp
+9
-6
profiler/src/profile_batched_gemm.cpp
profiler/src/profile_batched_gemm.cpp
+19
-19
profiler/src/profile_batched_gemm_reduce.cpp
profiler/src/profile_batched_gemm_reduce.cpp
+7
-7
profiler/src/profile_conv_bwd_data.cpp
profiler/src/profile_conv_bwd_data.cpp
+6
-6
profiler/src/profile_conv_bwd_weight.cpp
profiler/src/profile_conv_bwd_weight.cpp
+3
-3
profiler/src/profile_conv_fwd_bias_relu.cpp
profiler/src/profile_conv_fwd_bias_relu.cpp
+3
-3
profiler/src/profile_conv_fwd_bias_relu_add.cpp
profiler/src/profile_conv_fwd_bias_relu_add.cpp
+3
-3
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
+3
-3
profiler/src/profile_convnd_bwd_data.cpp
profiler/src/profile_convnd_bwd_data.cpp
+48
-48
profiler/src/profile_convnd_fwd.cpp
profiler/src/profile_convnd_fwd.cpp
+17
-17
profiler/src/profile_gemm.cpp
profiler/src/profile_gemm.cpp
+19
-19
profiler/src/profile_gemm_bias_2d.cpp
profiler/src/profile_gemm_bias_2d.cpp
+11
-11
profiler/src/profile_gemm_bias_relu.cpp
profiler/src/profile_gemm_bias_relu.cpp
+7
-7
profiler/src/profile_gemm_bias_relu_add.cpp
profiler/src/profile_gemm_bias_relu_add.cpp
+7
-7
profiler/src/profile_gemm_reduce.cpp
profiler/src/profile_gemm_reduce.cpp
+7
-7
No files found.
profiler/include/profile_gemm_bias_relu_add_impl.hpp
View file @
e00a943e
...
@@ -48,7 +48,7 @@ template <typename ADataType,
...
@@ -48,7 +48,7 @@ template <typename ADataType,
void
profile_gemm_bias_relu_add_impl
(
int
do_verification
,
void
profile_gemm_bias_relu_add_impl
(
int
do_verification
,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
M
,
int
M
,
int
N
,
int
N
,
int
K
,
int
K
,
...
@@ -232,7 +232,8 @@ void profile_gemm_bias_relu_add_impl(int do_verification,
...
@@ -232,7 +232,8 @@ void profile_gemm_bias_relu_add_impl(int do_verification,
{
{
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
...
...
profiler/include/profile_gemm_bias_relu_impl.hpp
View file @
e00a943e
...
@@ -48,7 +48,7 @@ template <typename ADataType,
...
@@ -48,7 +48,7 @@ template <typename ADataType,
void
profile_gemm_bias_relu_impl
(
int
do_verification
,
void
profile_gemm_bias_relu_impl
(
int
do_verification
,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
M
,
int
M
,
int
N
,
int
N
,
int
K
,
int
K
,
...
@@ -212,7 +212,8 @@ void profile_gemm_bias_relu_impl(int do_verification,
...
@@ -212,7 +212,8 @@ void profile_gemm_bias_relu_impl(int do_verification,
{
{
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
...
...
profiler/include/profile_gemm_impl.hpp
View file @
e00a943e
...
@@ -91,7 +91,7 @@ template <typename ADataType,
...
@@ -91,7 +91,7 @@ template <typename ADataType,
void
profile_gemm_impl
(
int
do_verification
,
void
profile_gemm_impl
(
int
do_verification
,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
M
,
int
M
,
int
N
,
int
N
,
int
K
,
int
K
,
...
@@ -416,7 +416,8 @@ void profile_gemm_impl(int do_verification,
...
@@ -416,7 +416,8 @@ void profile_gemm_impl(int do_verification,
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
...
...
profiler/include/profile_gemm_reduce_impl.hpp
View file @
e00a943e
...
@@ -52,7 +52,7 @@ template <typename ADataType,
...
@@ -52,7 +52,7 @@ template <typename ADataType,
bool
profile_gemm_reduce_impl
(
int
do_verification
,
bool
profile_gemm_reduce_impl
(
int
do_verification
,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
M
,
int
M
,
int
N
,
int
N
,
int
K
,
int
K
,
...
@@ -243,36 +243,18 @@ bool profile_gemm_reduce_impl(int do_verification,
...
@@ -243,36 +243,18 @@ bool profile_gemm_reduce_impl(int do_verification,
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
// warm up
// init DO, D1 to 0
invoker_ptr
->
Run
(
argument_ptr
.
get
());
d0_device_buf
.
SetZero
();
d1_device_buf
.
SetZero
();
// timing
float
ave_time
=
float
total_time
=
0
;
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
{
// init DO, D1 to 0
d0_device_buf
.
SetZero
();
d1_device_buf
.
SetZero
();
KernelTimer
timer
;
timer
.
Start
();
invoker_ptr
->
Run
(
argument_ptr
.
get
());
timer
.
End
();
total_time
+=
timer
.
GetElapsedTime
();
}
float
ave_time
=
total_time
/
nrepeat
;
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
M
+
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
N
+
sizeof
(
CDataType
)
*
M
*
N
+
sizeof
(
CDataType
)
*
N
;
sizeof
(
CDataType
)
*
M
*
N
+
sizeof
(
CDataType
)
*
N
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
...
...
profiler/include/profile_grouped_gemm_impl.hpp
View file @
e00a943e
...
@@ -49,7 +49,7 @@ template <typename ADataType,
...
@@ -49,7 +49,7 @@ template <typename ADataType,
void
profile_grouped_gemm_impl
(
int
do_verification
,
void
profile_grouped_gemm_impl
(
int
do_verification
,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
const
std
::
vector
<
int
>&
Ms
,
const
std
::
vector
<
int
>&
Ms
,
const
std
::
vector
<
int
>&
Ns
,
const
std
::
vector
<
int
>&
Ns
,
const
std
::
vector
<
int
>&
Ks
,
const
std
::
vector
<
int
>&
Ks
,
...
@@ -231,7 +231,8 @@ void profile_grouped_gemm_impl(int do_verification,
...
@@ -231,7 +231,8 @@ void profile_grouped_gemm_impl(int do_verification,
{
{
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
0
,
num_btype
=
0
;
std
::
size_t
flop
=
0
,
num_btype
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
...
...
profiler/include/profile_reduce_impl.hpp
View file @
e00a943e
...
@@ -157,7 +157,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -157,7 +157,7 @@ void profile_reduce_impl_impl(bool do_verification,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
bool
do_dumpout
,
bool
do_dumpout
,
int
nrepeat
,
bool
time_kernel
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
int
>&
reduceDims
,
const
std
::
vector
<
int
>&
reduceDims
,
float
alpha
,
float
alpha
,
...
@@ -430,7 +430,8 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -430,7 +430,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto
invoker_ptr
=
reduce_ptr
->
MakeInvokerPointer
();
auto
invoker_ptr
=
reduce_ptr
->
MakeInvokerPointer
();
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_bytes
=
std
::
size_t
num_bytes
=
invariant_total_length
*
reduce_total_length
*
sizeof
(
InDataType
)
+
invariant_total_length
*
reduce_total_length
*
sizeof
(
InDataType
)
+
...
@@ -516,7 +517,8 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -516,7 +517,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto
invoker_ptr
=
reduce_ptr
->
MakeInvokerPointer
();
auto
invoker_ptr
=
reduce_ptr
->
MakeInvokerPointer
();
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_bytes
=
std
::
size_t
num_bytes
=
invariant_total_length
*
reduce_total_length
*
sizeof
(
InDataType
)
+
invariant_total_length
*
reduce_total_length
*
sizeof
(
InDataType
)
+
...
@@ -554,7 +556,8 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -554,7 +556,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto
invoker2_ptr
=
reduce2_ptr
->
MakeInvokerPointer
();
auto
invoker2_ptr
=
reduce2_ptr
->
MakeInvokerPointer
();
float
avg_time_2
=
invoker2_ptr
->
Run
(
argument2_ptr
.
get
(),
nrepeat
);
float
avg_time_2
=
invoker2_ptr
->
Run
(
argument2_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_bytes_2
=
std
::
size_t
num_bytes_2
=
static_cast
<
size_t
>
(
inLengths2
[
0
])
*
inLengths2
[
1
]
*
sizeof
(
AccDataType
);
static_cast
<
size_t
>
(
inLengths2
[
0
])
*
inLengths2
[
1
]
*
sizeof
(
AccDataType
);
...
@@ -625,7 +628,7 @@ void profile_reduce_impl(bool do_verification,
...
@@ -625,7 +628,7 @@ void profile_reduce_impl(bool do_verification,
int
init_method
,
int
init_method
,
bool
do_log
,
bool
do_log
,
bool
do_dumpout
,
bool
do_dumpout
,
int
nrepeat
,
bool
time_kernel
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
int
>&
reduceDims
,
const
std
::
vector
<
int
>&
reduceDims
,
ReduceTensorOp
ReduceOpId
,
ReduceTensorOp
ReduceOpId
,
...
@@ -663,7 +666,7 @@ void profile_reduce_impl(bool do_verification,
...
@@ -663,7 +666,7 @@ void profile_reduce_impl(bool do_verification,
init_method
,
init_method
,
do_log
,
do_log
,
do_dumpout
,
do_dumpout
,
nrepeat
,
time_kernel
,
inLengths
,
inLengths
,
reduceDims
,
reduceDims
,
alpha
,
alpha
,
...
...
profiler/src/profile_batched_gemm.cpp
View file @
e00a943e
...
@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[])
printf
(
" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])
\n
"
);
printf
(
" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount
\n
"
);
exit
(
1
);
exit
(
1
);
}
}
...
@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[])
...
@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
profiler/src/profile_batched_gemm_reduce.cpp
View file @
e00a943e
...
@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
...
@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount
\n
"
);
printf
(
"arg15: split k into mulitiple batch
\n
"
);
printf
(
"arg15: split k into mulitiple batch
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
...
@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
...
@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
...
@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
...
@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
...
@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
profiler/src/profile_conv_bwd_data.cpp
View file @
e00a943e
...
@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
...
@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
"RightPx
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
...
@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
...
@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
N
,
K
,
K
,
C
,
C
,
...
@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
...
@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
N
,
K
,
K
,
C
,
C
,
...
@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
...
@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
N
,
K
,
K
,
C
,
C
,
...
@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
...
@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
N
,
K
,
K
,
C
,
C
,
...
...
profiler/src/profile_conv_bwd_weight.cpp
View file @
e00a943e
...
@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
...
@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
...
@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
N
,
N
,
K
,
K
,
C
,
C
,
...
@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
...
@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
N
,
N
,
K
,
K
,
C
,
C
,
...
...
profiler/src/profile_conv_fwd_bias_relu.cpp
View file @
e00a943e
...
@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
...
@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
"RightPx
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
...
@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
...
@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
N
,
N
,
K
,
K
,
C
,
C
,
...
...
profiler/src/profile_conv_fwd_bias_relu_add.cpp
View file @
e00a943e
...
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
...
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
"RightPx
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
...
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
...
@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
N
,
N
,
K
,
K
,
C
,
C
,
...
...
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
View file @
e00a943e
...
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
...
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
"RightPx
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
...
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
...
@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
N
,
N
,
K
,
K
,
C
,
C
,
...
...
profiler/src/profile_convnd_bwd_data.cpp
View file @
e00a943e
...
@@ -39,40 +39,40 @@ ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[],
...
@@ -39,40 +39,40 @@ ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[],
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck
::
utils
::
conv
::
ConvParams
params
;
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
num_dim_spatial
;
params
.
num_dim_spatial
_
=
num_dim_spatial
;
params
.
N
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
N
_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
K
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
K
_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
C
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
C
_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
filter_spatial_lengths
.
resize
(
num_dim_spatial
);
params
.
filter_spatial_lengths
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
{
params
.
filter_spatial_lengths
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
filter_spatial_lengths
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
}
params
.
input_spatial_lengths
.
resize
(
num_dim_spatial
);
params
.
input_spatial_lengths
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
{
params
.
input_spatial_lengths
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
input_spatial_lengths
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
}
params
.
conv_filter_strides
.
resize
(
num_dim_spatial
);
params
.
conv_filter_strides
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
{
params
.
conv_filter_strides
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
conv_filter_strides
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
}
params
.
conv_filter_dilations
.
resize
(
num_dim_spatial
);
params
.
conv_filter_dilations
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
{
params
.
conv_filter_dilations
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
conv_filter_dilations
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
}
params
.
input_left_pads
.
resize
(
num_dim_spatial
);
params
.
input_left_pads
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
{
params
.
input_left_pads
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
input_left_pads
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
}
params
.
input_right_pads
.
resize
(
num_dim_spatial
);
params
.
input_right_pads
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
{
params
.
input_right_pads
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
input_right_pads
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
}
return
params
;
return
params
;
...
@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
...
@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
"RightPx
\n
"
);
return
1
;
return
1
;
...
@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
...
@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
ck
::
utils
::
conv
::
ConvParams
params
=
parse_conv_params
(
num_dim_spatial
,
argv
,
preParams
);
ck
::
utils
::
conv
::
ConvParams
params
=
parse_conv_params
(
num_dim_spatial
,
argv
,
preParams
);
...
@@ -132,17 +132,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
...
@@ -132,17 +132,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
params
.
N
,
params
.
N
_
,
params
.
K
,
params
.
K
_
,
params
.
C
,
params
.
C
_
,
params
.
input_spatial_lengths
,
params
.
input_spatial_lengths
_
,
params
.
filter_spatial_lengths
,
params
.
filter_spatial_lengths
_
,
params
.
GetOutputSpatialLengths
(),
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
,
params
.
input_left_pads
_
,
params
.
input_right_pads
);
params
.
input_right_pads
_
);
break
;
break
;
case
2
:
case
2
:
...
@@ -157,17 +157,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
...
@@ -157,17 +157,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
params
.
N
,
params
.
N
_
,
params
.
K
,
params
.
K
_
,
params
.
C
,
params
.
C
_
,
params
.
input_spatial_lengths
,
params
.
input_spatial_lengths
_
,
params
.
filter_spatial_lengths
,
params
.
filter_spatial_lengths
_
,
params
.
GetOutputSpatialLengths
(),
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
,
params
.
input_left_pads
_
,
params
.
input_right_pads
);
params
.
input_right_pads
_
);
break
;
break
;
case
3
:
case
3
:
...
@@ -182,17 +182,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
...
@@ -182,17 +182,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
params
.
N
,
params
.
N
_
,
params
.
K
,
params
.
K
_
,
params
.
C
,
params
.
C
_
,
params
.
input_spatial_lengths
,
params
.
input_spatial_lengths
_
,
params
.
filter_spatial_lengths
,
params
.
filter_spatial_lengths
_
,
params
.
GetOutputSpatialLengths
(),
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
,
params
.
input_left_pads
_
,
params
.
input_right_pads
);
params
.
input_right_pads
_
);
break
;
break
;
default:
break
;
default:
break
;
...
...
profiler/src/profile_convnd_fwd.cpp
View file @
e00a943e
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
#include <vector>
#include <vector>
#include <half.hpp>
#include <half.hpp>
#include "conv_
fwd_
util.hpp"
#include "conv_util.hpp"
#include "element_wise_operation.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "fill.hpp"
#include "profile_convnd_fwd.hpp"
#include "profile_convnd_fwd.hpp"
...
@@ -119,7 +119,7 @@ template <int NDim,
...
@@ -119,7 +119,7 @@ template <int NDim,
void
profile_convnd_instances_impl
(
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
void
profile_convnd_instances_impl
(
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
bool
do_verification
,
bool
do_verification
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
init_method
,
int
init_method
,
ConvLayouts
)
ConvLayouts
)
{
{
...
@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
...
@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
reference_conv_fwd_fun
);
reference_conv_fwd_fun
);
auto
best_conf
=
run_engine
.
Profile
(
auto
best_conf
=
run_engine
.
Profile
(
conv
::
ConvolutionFwdInstances
<
InDataType
,
WeiDataType
,
OutDataType
>::
template
Get
<
NDim
>(),
conv
::
ConvolutionFwdInstances
<
InDataType
,
WeiDataType
,
OutDataType
>::
template
Get
<
NDim
>(),
nrepeat
,
time_kernel
,
do_verification
,
do_verification
,
do_log
);
do_log
);
...
@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type,
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
bool
do_verification
,
bool
do_verification
,
bool
do_log
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
init_method
)
int
init_method
)
{
{
switch
(
data_layout
)
switch
(
data_layout
)
...
@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
break
;
...
@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
break
;
...
@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
break
;
...
@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
break
;
...
@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
break
;
...
@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
break
;
...
@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
break
;
...
@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type,
...
@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
params
,
do_verification
,
do_verification
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
break
;
...
@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
...
@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
bool
do_verification
{
true
};
bool
do_verification
{
true
};
int
init_method
{
2
};
int
init_method
{
2
};
bool
do_log
{
false
};
bool
do_log
{
false
};
int
nrepeat
{
100
};
bool
time_kernel
{
false
};
int
num_dim_spatial
{
2
};
int
num_dim_spatial
{
2
};
ConvParams
params
;
ConvParams
params
;
...
@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
...
@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
do_verification
=
std
::
stoi
(
argv
[
4
]);
do_verification
=
std
::
stoi
(
argv
[
4
]);
init_method
=
std
::
stoi
(
argv
[
5
]);
init_method
=
std
::
stoi
(
argv
[
5
]);
do_log
=
std
::
stoi
(
argv
[
6
]);
do_log
=
std
::
stoi
(
argv
[
6
]);
nrepeat
=
std
::
stoi
(
argv
[
7
]);
time_kernel
=
std
::
stoi
(
argv
[
7
]);
num_dim_spatial
=
std
::
stoi
(
argv
[
8
]);
num_dim_spatial
=
std
::
stoi
(
argv
[
8
]);
}
}
if
(
argc
>=
10
)
if
(
argc
>=
10
)
...
@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
...
@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
{
{
case
1
:
case
1
:
profile_convnd_instances
<
1
>
(
profile_convnd_instances
<
1
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
nrepeat
,
init_method
);
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
break
;
case
2
:
case
2
:
profile_convnd_instances
<
2
>
(
profile_convnd_instances
<
2
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
nrepeat
,
init_method
);
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
break
;
case
3
:
case
3
:
profile_convnd_instances
<
3
>
(
profile_convnd_instances
<
3
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
nrepeat
,
init_method
);
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
break
;
default:
default:
throw
std
::
runtime_error
(
"profile_conv_fwd: unsupported num_dim_spatial value: "
+
throw
std
::
runtime_error
(
"profile_conv_fwd: unsupported num_dim_spatial value: "
+
...
...
profiler/src/profile_gemm.cpp
View file @
e00a943e
...
@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[])
...
@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[])
...
@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
profiler/src/profile_gemm_bias_2d.cpp
View file @
e00a943e
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: alpha
\n
"
);
printf
(
"arg14: alpha
\n
"
);
printf
(
"arg15: beta
\n
"
);
printf
(
"arg15: beta
\n
"
);
...
@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
...
@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
profiler/src/profile_gemm_bias_relu.cpp
View file @
e00a943e
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[])
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
...
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
...
@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
...
@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
...
@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
...
@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
profiler/src/profile_gemm_bias_relu_add.cpp
View file @
e00a943e
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1
\n
"
);
printf
(
"arg15: split k into mulitiple batch
\n
"
);
printf
(
"arg15: split k into mulitiple batch
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
...
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
...
@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
...
@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
...
@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
...
@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
profiler/src/profile_gemm_reduce.cpp
View file @
e00a943e
...
@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[])
...
@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
exit
(
1
);
exit
(
1
);
...
@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[])
...
@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[])
...
@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[])
...
@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[])
...
@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[])
...
@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
do_verification
,
init_method
,
init_method
,
do_log
,
do_log
,
nrepeat
,
time_kernel
,
M
,
M
,
N
,
N
,
K
,
K
,
...
...
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment