Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b134b7d6
Commit
b134b7d6
authored
May 16, 2022
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into cpu_avx2
parents
090ba885
9f71ff48
Changes
211
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
286 additions
and
243 deletions
+286
-243
profiler/include/profile_gemm_reduce_impl.hpp
profiler/include/profile_gemm_reduce_impl.hpp
+32
-46
profiler/include/profile_grouped_gemm_impl.hpp
profiler/include/profile_grouped_gemm_impl.hpp
+14
-13
profiler/include/profile_reduce_impl.hpp
profiler/include/profile_reduce_impl.hpp
+9
-6
profiler/src/profile_batched_gemm.cpp
profiler/src/profile_batched_gemm.cpp
+19
-19
profiler/src/profile_batched_gemm_reduce.cpp
profiler/src/profile_batched_gemm_reduce.cpp
+7
-7
profiler/src/profile_conv_bwd_data.cpp
profiler/src/profile_conv_bwd_data.cpp
+6
-6
profiler/src/profile_conv_bwd_weight.cpp
profiler/src/profile_conv_bwd_weight.cpp
+3
-3
profiler/src/profile_conv_fwd_bias_relu.cpp
profiler/src/profile_conv_fwd_bias_relu.cpp
+3
-3
profiler/src/profile_conv_fwd_bias_relu_add.cpp
profiler/src/profile_conv_fwd_bias_relu_add.cpp
+3
-3
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
+3
-3
profiler/src/profile_convnd_bwd_data.cpp
profiler/src/profile_convnd_bwd_data.cpp
+48
-48
profiler/src/profile_convnd_fwd.cpp
profiler/src/profile_convnd_fwd.cpp
+17
-17
profiler/src/profile_gemm.cpp
profiler/src/profile_gemm.cpp
+19
-19
profiler/src/profile_gemm_bias_2d.cpp
profiler/src/profile_gemm_bias_2d.cpp
+11
-11
profiler/src/profile_gemm_bias_relu.cpp
profiler/src/profile_gemm_bias_relu.cpp
+7
-7
profiler/src/profile_gemm_bias_relu_add.cpp
profiler/src/profile_gemm_bias_relu_add.cpp
+7
-7
profiler/src/profile_gemm_reduce.cpp
profiler/src/profile_gemm_reduce.cpp
+7
-7
profiler/src/profile_grouped_gemm.cpp
profiler/src/profile_grouped_gemm.cpp
+7
-7
profiler/src/profile_reduce.cpp
profiler/src/profile_reduce.cpp
+11
-11
script/parse_perf_data.py
script/parse_perf_data.py
+53
-0
No files found.
profiler/include/profile_gemm_reduce_impl.hpp
View file @
b134b7d6
...
...
@@ -7,7 +7,7 @@
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "element_wise_operation.hpp"
#include "
element_wise_
reduc
e
_operat
ion
.hpp"
#include "reduc
tion
_operat
or
.hpp"
#include "device_gemm_reduce.hpp"
#include "reference_gemm.hpp"
...
...
@@ -20,8 +20,7 @@ using DeviceGemmReduceNoOpPtr = ck::tensor_operation::device::DeviceGemmReducePt
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
ReduceSum
,
ck
::
tensor_operation
::
element_wise
::
ReduceSquareSum
>
;
ck
::
tensor_operation
::
element_wise
::
UnarySquare
<
float
,
float
,
false
>>
;
void
add_device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instances
(
std
::
vector
<
DeviceGemmReduceNoOpPtr
>&
);
...
...
@@ -53,7 +52,7 @@ template <typename ADataType,
bool
profile_gemm_reduce_impl
(
int
do_verification
,
int
init_method
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
M
,
int
N
,
int
K
,
...
...
@@ -113,17 +112,19 @@ bool profile_gemm_reduce_impl(int do_verification,
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
},
num_thread
);
}
using
AElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
BElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
D0ReduceOp
=
ck
::
tensor_operation
::
element_wise
::
ReduceSum
;
using
D1ReduceOp
=
ck
::
tensor_operation
::
element_wise
::
ReduceSquareSum
;
using
AElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
BElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
D0ReduceOp
=
ck
::
reduce
::
Add
<
float
>
;
using
D1ReduceOp
=
ck
::
reduce
::
Add
<
float
>
;
using
D1ElementOp
=
ck
::
tensor_operation
::
element_wise
::
UnarySquare
<
float
,
float
,
false
>
;
const
auto
a_element_op
=
AElementOp
{};
const
auto
b_element_op
=
BElementOp
{};
const
auto
c_element_op
=
CElementOp
{};
const
auto
d0_reduce_op
=
D0ReduceOp
{};
const
auto
d1_reduce_op
=
D1ReduceOp
{};
const
auto
a_element_op
=
AElementOp
{};
const
auto
b_element_op
=
BElementOp
{};
const
auto
c_element_op
=
CElementOp
{};
const
auto
d0_reduce_op
=
D0ReduceOp
{};
const
auto
d1_reduce_op
=
D1ReduceOp
{};
const
auto
d1_element_op
=
D1ElementOp
{};
if
(
do_verification
)
{
...
...
@@ -140,17 +141,21 @@ bool profile_gemm_reduce_impl(int do_verification,
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
float
d0_acc
=
d0_reduce_op
.
GetReduc
e
ZeroVal
ue
();
float
d1_acc
=
d1_reduce_op
.
GetReduc
e
ZeroVal
ue
();
float
d0_acc
=
d0_reduce_op
.
GetReduc
tion
ZeroVal
();
float
d1_acc
=
d1_reduce_op
.
GetReduc
tion
ZeroVal
();
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
d0_reduce_op
.
Reduce
(
d0_acc
,
c_m_n_host_result
(
m
,
n
));
d1_reduce_op
.
Reduce
(
d1_acc
,
c_m_n_host_result
(
m
,
n
));
float
d0_val
=
ck
::
type_convert
<
float
>
(
c_m_n_host_result
(
m
,
n
));
float
d1_val
;
d1_element_op
(
d1_val
,
d0_val
);
d0_reduce_op
(
d0_acc
,
d0_val
);
d1_reduce_op
(
d1_acc
,
d1_val
);
}
d0_m_host_result
(
m
)
=
d0_acc
;
d1_m_host_result
(
m
)
=
d1_acc
;
d0_m_host_result
(
m
)
=
ck
::
type_convert
<
DDataType
>
(
d0_acc
)
;
d1_m_host_result
(
m
)
=
ck
::
type_convert
<
DDataType
>
(
d1_acc
)
;
}
}
...
...
@@ -232,43 +237,24 @@ bool profile_gemm_reduce_impl(int do_verification,
a_element_op
,
b_element_op
,
c_element_op
,
d0_reduce_op
,
d1_reduce_op
);
d1_element_op
);
auto
invoker_ptr
=
gemm_ptr
->
MakeInvokerPointer
();
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
// warm up
invoker_ptr
->
Run
(
argument_ptr
.
get
());
// timing
float
total_time
=
0
;
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
{
// init DO, D1 to 0
d0_device_buf
.
SetZero
();
d1_device_buf
.
SetZero
();
KernelTimer
timer
;
timer
.
Start
();
invoker_ptr
->
Run
(
argument_ptr
.
get
());
timer
.
End
();
total_time
+=
timer
.
GetElapsedTime
();
}
// init DO, D1 to 0
d0_device_buf
.
SetZero
();
d1_device_buf
.
SetZero
();
float
ave_time
=
total_time
/
nrepeat
;
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
M
+
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
N
+
sizeof
(
CDataType
)
*
M
*
N
+
sizeof
(
CDataType
)
*
N
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
...
...
profiler/include/profile_grouped_gemm_impl.hpp
View file @
b134b7d6
...
...
@@ -49,13 +49,13 @@ template <typename ADataType,
void
profile_grouped_gemm_impl
(
int
do_verification
,
int
init_method
,
bool
do_log
,
int
nrepeat
,
std
::
vector
<
int
>
Ms
,
std
::
vector
<
int
>
Ns
,
std
::
vector
<
int
>
Ks
,
std
::
vector
<
int
>
StrideAs
,
std
::
vector
<
int
>
StrideBs
,
std
::
vector
<
int
>
StrideCs
)
bool
time_kernel
,
const
std
::
vector
<
int
>
&
Ms
,
const
std
::
vector
<
int
>
&
Ns
,
const
std
::
vector
<
int
>
&
Ks
,
const
std
::
vector
<
int
>
&
StrideAs
,
const
std
::
vector
<
int
>
&
StrideBs
,
const
std
::
vector
<
int
>
&
StrideCs
)
{
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
...
...
@@ -71,7 +71,7 @@ void profile_grouped_gemm_impl(int do_verification,
}
};
in
t
group_count
=
Ms
.
size
();
std
::
size_
t
group_count
=
Ms
.
size
();
if
(
!
(
group_count
==
Ns
.
size
()
&&
group_count
==
Ks
.
size
()
&&
group_count
==
StrideAs
.
size
()
&&
group_count
==
StrideBs
.
size
()
&&
group_count
==
StrideCs
.
size
()))
...
...
@@ -83,7 +83,7 @@ void profile_grouped_gemm_impl(int do_verification,
std
::
vector
<
Tensor
<
BDataType
>>
b_k_n
;
std
::
vector
<
Tensor
<
CDataType
>>
c_m_n_device_results
;
for
(
in
t
i
=
0
;
i
<
Ms
.
size
()
;
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
group_count
;
i
++
)
{
a_m_k
.
push_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
Ms
[
i
],
Ks
[
i
],
StrideAs
[
i
],
ALayout
{})));
...
...
@@ -144,7 +144,7 @@ void profile_grouped_gemm_impl(int do_verification,
gemm_shapes
.
reserve
(
group_count
);
for
(
in
t
i
=
0
;
i
<
group_count
;
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
group_count
;
i
++
)
{
a_device_buf
.
emplace_back
(
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_m_k
[
i
].
mDesc
.
GetElementSpace
()));
...
...
@@ -231,10 +231,11 @@ void profile_grouped_gemm_impl(int do_verification,
{
std
::
string
gemm_name
=
gemm_ptr
->
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
flop
=
0
,
num_btype
=
0
;
for
(
in
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
{
flop
+=
std
::
size_t
(
2
)
*
Ms
[
i
]
*
Ns
[
i
]
*
Ks
[
i
];
...
...
@@ -258,7 +259,7 @@ void profile_grouped_gemm_impl(int do_verification,
if
(
do_verification
)
{
for
(
in
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
{
c_device_buf
[
i
]
->
FromDevice
(
c_m_n_device_results
[
i
].
mData
.
data
());
...
...
profiler/include/profile_reduce_impl.hpp
View file @
b134b7d6
...
...
@@ -157,7 +157,7 @@ void profile_reduce_impl_impl(bool do_verification,
int
init_method
,
bool
do_log
,
bool
do_dumpout
,
int
nrepeat
,
bool
time_kernel
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
int
>&
reduceDims
,
float
alpha
,
...
...
@@ -430,7 +430,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto
invoker_ptr
=
reduce_ptr
->
MakeInvokerPointer
();
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_bytes
=
invariant_total_length
*
reduce_total_length
*
sizeof
(
InDataType
)
+
...
...
@@ -516,7 +517,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto
invoker_ptr
=
reduce_ptr
->
MakeInvokerPointer
();
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_bytes
=
invariant_total_length
*
reduce_total_length
*
sizeof
(
InDataType
)
+
...
...
@@ -554,7 +556,8 @@ void profile_reduce_impl_impl(bool do_verification,
auto
invoker2_ptr
=
reduce2_ptr
->
MakeInvokerPointer
();
float
avg_time_2
=
invoker2_ptr
->
Run
(
argument2_ptr
.
get
(),
nrepeat
);
float
avg_time_2
=
invoker2_ptr
->
Run
(
argument2_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
std
::
size_t
num_bytes_2
=
static_cast
<
size_t
>
(
inLengths2
[
0
])
*
inLengths2
[
1
]
*
sizeof
(
AccDataType
);
...
...
@@ -625,7 +628,7 @@ void profile_reduce_impl(bool do_verification,
int
init_method
,
bool
do_log
,
bool
do_dumpout
,
int
nrepeat
,
bool
time_kernel
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
int
>&
reduceDims
,
ReduceTensorOp
ReduceOpId
,
...
...
@@ -663,7 +666,7 @@ void profile_reduce_impl(bool do_verification,
init_method
,
do_log
,
do_dumpout
,
nrepeat
,
time_kernel
,
inLengths
,
reduceDims
,
alpha
,
...
...
profiler/src/profile_batched_gemm.cpp
View file @
b134b7d6
...
...
@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[])
printf
(
" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount
\n
"
);
exit
(
1
);
}
...
...
@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_batched_gemm_reduce.cpp
View file @
b134b7d6
...
...
@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount
\n
"
);
printf
(
"arg15: split k into mulitiple batch
\n
"
);
exit
(
1
);
...
...
@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_conv_bwd_data.cpp
View file @
b134b7d6
...
...
@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
...
...
@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
...
@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
K
,
C
,
...
...
@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
K
,
C
,
...
...
@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
K
,
C
,
...
...
@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
StreamControl
{
nullptr
,
time_kernel
}
,
N
,
K
,
C
,
...
...
profiler/src/profile_conv_bwd_weight.cpp
View file @
b134b7d6
...
...
@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
...
@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
N
,
K
,
C
,
...
...
@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
N
,
K
,
C
,
...
...
profiler/src/profile_conv_fwd_bias_relu.cpp
View file @
b134b7d6
...
...
@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
...
...
@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
...
@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
N
,
K
,
C
,
...
...
profiler/src/profile_conv_fwd_bias_relu_add.cpp
View file @
b134b7d6
...
...
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
...
...
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
...
@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
N
,
K
,
C
,
...
...
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
View file @
b134b7d6
...
...
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
...
...
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
...
...
@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
N
,
K
,
C
,
...
...
profiler/src/profile_convnd_bwd_data.cpp
View file @
b134b7d6
...
...
@@ -39,40 +39,40 @@ ck::utils::conv::ConvParams parse_conv_params(int num_dim_spatial, char* argv[],
// (N, K, C) + num_dim_spatial * 6 (filter, input, strides, dilations, pad left, pad right)
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
num_dim_spatial
;
params
.
N
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
K
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
C
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
num_dim_spatial
_
=
num_dim_spatial
;
params
.
N
_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
K
_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
C
_
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
filter_spatial_lengths
.
resize
(
num_dim_spatial
);
params
.
filter_spatial_lengths
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
filter_spatial_lengths
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
filter_spatial_lengths
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_spatial_lengths
.
resize
(
num_dim_spatial
);
params
.
input_spatial_lengths
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_spatial_lengths
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
input_spatial_lengths
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
conv_filter_strides
.
resize
(
num_dim_spatial
);
params
.
conv_filter_strides
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
conv_filter_strides
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
conv_filter_strides
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
conv_filter_dilations
.
resize
(
num_dim_spatial
);
params
.
conv_filter_dilations
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
conv_filter_dilations
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
conv_filter_dilations
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_left_pads
.
resize
(
num_dim_spatial
);
params
.
input_left_pads
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_left_pads
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
input_left_pads
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
params
.
input_right_pads
.
resize
(
num_dim_spatial
);
params
.
input_right_pads
_
.
resize
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
params
.
input_right_pads
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
params
.
input_right_pads
_
[
i
]
=
std
::
stoi
(
argv
[
arg_idx
++
]);
}
return
params
;
...
...
@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg9:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
return
1
;
...
...
@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
9
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
ck
::
utils
::
conv
::
ConvParams
params
=
parse_conv_params
(
num_dim_spatial
,
argv
,
preParams
);
...
...
@@ -132,17 +132,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification
,
init_method
,
do_log
,
nrepeat
,
params
.
N
,
params
.
K
,
params
.
C
,
params
.
input_spatial_lengths
,
params
.
filter_spatial_lengths
,
time_kernel
,
params
.
N
_
,
params
.
K
_
,
params
.
C
_
,
params
.
input_spatial_lengths
_
,
params
.
filter_spatial_lengths
_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
);
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
_
,
params
.
input_right_pads
_
);
break
;
case
2
:
...
...
@@ -157,17 +157,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification
,
init_method
,
do_log
,
nrepeat
,
params
.
N
,
params
.
K
,
params
.
C
,
params
.
input_spatial_lengths
,
params
.
filter_spatial_lengths
,
time_kernel
,
params
.
N
_
,
params
.
K
_
,
params
.
C
_
,
params
.
input_spatial_lengths
_
,
params
.
filter_spatial_lengths
_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
);
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
_
,
params
.
input_right_pads
_
);
break
;
case
3
:
...
...
@@ -182,17 +182,17 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
do_verification
,
init_method
,
do_log
,
nrepeat
,
params
.
N
,
params
.
K
,
params
.
C
,
params
.
input_spatial_lengths
,
params
.
filter_spatial_lengths
,
time_kernel
,
params
.
N
_
,
params
.
K
_
,
params
.
C
_
,
params
.
input_spatial_lengths
_
,
params
.
filter_spatial_lengths
_
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
);
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
_
,
params
.
input_right_pads
_
);
break
;
default:
break
;
...
...
profiler/src/profile_convnd_fwd.cpp
View file @
b134b7d6
...
...
@@ -5,7 +5,7 @@
#include <vector>
#include <half.hpp>
#include "conv_
fwd_
util.hpp"
#include "conv_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "profile_convnd_fwd.hpp"
...
...
@@ -119,7 +119,7 @@ template <int NDim,
void
profile_convnd_instances_impl
(
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
bool
do_verification
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
init_method
,
ConvLayouts
)
{
...
...
@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
reference_conv_fwd_fun
);
auto
best_conf
=
run_engine
.
Profile
(
conv
::
ConvolutionFwdInstances
<
InDataType
,
WeiDataType
,
OutDataType
>::
template
Get
<
NDim
>(),
nrepeat
,
time_kernel
,
do_verification
,
do_log
);
...
...
@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type,
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
bool
do_verification
,
bool
do_log
,
int
nrepeat
,
bool
time_kernel
,
int
init_method
)
{
switch
(
data_layout
)
...
...
@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
...
...
@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
...
...
@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
...
...
@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NHWC
>
{});
break
;
...
...
@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
...
...
@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
...
...
@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
...
...
@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type,
params
,
do_verification
,
do_log
,
nrepeat
,
time_kernel
,
init_method
,
ConvolutionLayouts
<
NDim
,
ConvDataLayout
::
NCHW
>
{});
break
;
...
...
@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
bool
do_verification
{
true
};
int
init_method
{
2
};
bool
do_log
{
false
};
int
nrepeat
{
100
};
bool
time_kernel
{
false
};
int
num_dim_spatial
{
2
};
ConvParams
params
;
...
...
@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
do_verification
=
std
::
stoi
(
argv
[
4
]);
init_method
=
std
::
stoi
(
argv
[
5
]);
do_log
=
std
::
stoi
(
argv
[
6
]);
nrepeat
=
std
::
stoi
(
argv
[
7
]);
time_kernel
=
std
::
stoi
(
argv
[
7
]);
num_dim_spatial
=
std
::
stoi
(
argv
[
8
]);
}
if
(
argc
>=
10
)
...
...
@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
{
case
1
:
profile_convnd_instances
<
1
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
nrepeat
,
init_method
);
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
case
2
:
profile_convnd_instances
<
2
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
nrepeat
,
init_method
);
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
case
3
:
profile_convnd_instances
<
3
>
(
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
nrepeat
,
init_method
);
data_type
,
data_layout
,
params
,
do_verification
,
do_log
,
time_kernel
,
init_method
);
break
;
default:
throw
std
::
runtime_error
(
"profile_conv_fwd: unsupported num_dim_spatial value: "
+
...
...
profiler/src/profile_gemm.cpp
View file @
b134b7d6
...
...
@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
exit
(
1
);
...
...
@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_gemm_bias_2d.cpp
View file @
b134b7d6
...
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: alpha
\n
"
);
printf
(
"arg15: beta
\n
"
);
...
...
@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_gemm_bias_relu.cpp
View file @
b134b7d6
...
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
exit
(
1
);
...
...
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_gemm_bias_relu_add.cpp
View file @
b134b7d6
...
...
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1
\n
"
);
printf
(
"arg15: split k into mulitiple batch
\n
"
);
exit
(
1
);
...
...
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_gemm_reduce.cpp
View file @
b134b7d6
...
...
@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: M, N, K, StrideA, StrideB, StrideC
\n
"
);
printf
(
"arg14: split k into mulitiple batch
\n
"
);
exit
(
1
);
...
...
@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
...
...
@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[])
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
M
,
N
,
K
,
...
...
profiler/src/profile_grouped_gemm.cpp
View file @
b134b7d6
...
...
@@ -54,8 +54,8 @@ int profile_grouped_gemm(int argc, char* argv[])
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg
8
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
run
kernel
# of times (>1
)
\n
"
);
printf
(
"arg
6
: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7:
time
kernel
(0=n0, 1=yes
)
\n
"
);
printf
(
"arg8 to 13: Ms, Ns, Ks, StrideAs, StrideBs, StrideCs (e.g., 256,256 128,128 64,64 "
"64,64 64,64 128,128)
\n
"
);
exit
(
1
);
...
...
@@ -66,7 +66,7 @@ int profile_grouped_gemm(int argc, char* argv[])
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
7
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
auto
Ms
=
argToIntArray
(
argv
[
8
]);
const
auto
Ns
=
argToIntArray
(
argv
[
9
]);
...
...
@@ -86,7 +86,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
Ms
,
Ns
,
Ks
,
...
...
@@ -104,7 +104,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
Ms
,
Ns
,
Ks
,
...
...
@@ -122,7 +122,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
Ms
,
Ns
,
Ks
,
...
...
@@ -140,7 +140,7 @@ int profile_grouped_gemm(int argc, char* argv[])
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
nrepeat
,
time_kernel
,
Ms
,
Ns
,
Ks
,
...
...
profiler/src/profile_reduce.cpp
View file @
b134b7d6
...
...
@@ -144,7 +144,7 @@ class AppArgs
bool
do_dumpout
=
false
;
int
init_method
;
int
nrepeat
;
bool
time_kernel
;
bool
need_indices
=
false
;
...
...
@@ -186,7 +186,7 @@ class AppArgs
int
processArgs
(
int
argc
,
char
*
argv
[])
{
unsigned
int
ch
;
int
ch
;
optind
++
;
// to skip the "reduce" module name
...
...
@@ -295,7 +295,7 @@ class AppArgs
throw
std
::
runtime_error
(
"Invalid cmd-line arguments, more argumetns are needed!"
);
init_method
=
std
::
atoi
(
argv
[
optind
++
]);
nrepeat
=
std
::
atoi
(
argv
[
optind
]);
time_kernel
=
std
::
atoi
(
argv
[
optind
]);
if
(
scales
.
empty
())
{
...
...
@@ -354,7 +354,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -369,7 +369,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -387,7 +387,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -414,7 +414,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -429,7 +429,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -454,7 +454,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -471,7 +471,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
@@ -486,7 +486,7 @@ int profile_reduce(int argc, char* argv[])
args
.
init_method
,
args
.
do_log
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
time_kernel
,
args
.
inLengths
,
args
.
reduceDims
,
args
.
reduceOp
,
...
...
script/parse_perf_data.py
0 → 100644
View file @
b134b7d6
#!/usr/bin/env python3
import
os
,
io
import
argparse
def
print_to_string
(
*
args
,
**
kwargs
):
output
=
io
.
StringIO
()
print
(
*
args
,
file
=
output
,
**
kwargs
)
contents
=
output
.
getvalue
()
output
.
close
()
return
contents
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Parse results from tf benchmark runs'
)
parser
.
add_argument
(
'filename'
,
type
=
str
,
help
=
'Log file to prase or directory containing log files'
)
args
=
parser
.
parse_args
()
files
=
[]
if
os
.
path
.
isdir
(
args
.
filename
):
all_files
=
os
.
listdir
(
args
.
filename
)
for
name
in
all_files
:
if
not
'log'
in
name
:
continue
files
.
append
(
os
.
path
.
join
(
args
.
filename
,
name
))
else
:
files
=
[
args
.
filename
]
args
.
files
=
files
return
args
def
main
():
args
=
parse_args
()
results
=
[]
#parse results
glue
=
""
for
filename
in
args
.
files
:
for
line
in
open
(
filename
):
if
'Best Perf'
in
line
:
lst
=
line
.
split
()
results
.
append
(
print_to_string
(
glue
.
join
(
lst
[
8
:]),
lst
[
4
]))
#sort results
#read baseline results for the latest develop branch
#write new results to the db
#compare the results to the baseline
#return 0 if performance criteria met, otherwise return 1
print
(
results
)
return
0
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
Prev
1
…
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment