Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cab8f2e5
Commit
cab8f2e5
authored
Mar 14, 2022
by
Jing Zhang
Browse files
clean
parents
c20aabc3
9a17e7fb
Changes
86
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
222 additions
and
445 deletions
+222
-445
profiler/include/profile_reduce_impl.hpp
profiler/include/profile_reduce_impl.hpp
+83
-83
profiler/src/profile_reduce.cpp
profiler/src/profile_reduce.cpp
+13
-13
script/count_vgpr.sh
script/count_vgpr.sh
+17
-256
script/profile_reduce_no_index.sh
script/profile_reduce_no_index.sh
+53
-45
script/profile_reduce_with_index.sh
script/profile_reduce_with_index.sh
+50
-42
test/space_filling_curve/space_filling_curve.cpp
test/space_filling_curve/space_filling_curve.cpp
+6
-6
No files found.
profiler/include/profile_reduce_impl.hpp
View file @
cab8f2e5
...
@@ -9,54 +9,52 @@ namespace tensor_operation {
...
@@ -9,54 +9,52 @@ namespace tensor_operation {
namespace
device
{
namespace
device
{
namespace
device_reduce_instance
{
namespace
device_reduce_instance
{
template
<
int
Rank
,
typename
ReduceDim
s
,
int
ReduceOpId
,
int
NanOpt
,
int
IndicesOpt
>
template
<
int
Rank
,
int
Num
ReduceDim
,
int
ReduceOpId
,
int
NanOpt
,
int
IndicesOpt
>
struct
ReduceDescription
struct
ReduceDescription
{
{
static
constexpr
int
Rank_
=
Rank
;
static
constexpr
int
Rank_
=
Rank
;
static
constexpr
int
ReduceOpId_
=
ReduceOpId
;
static
constexpr
int
NumReduceDim_
=
NumReduceDim
;
static
constexpr
int
NanOpt_
=
NanOpt
;
static
constexpr
int
ReduceOpId_
=
ReduceOpId
;
static
constexpr
int
IndicesOpt_
=
IndicesOpt
;
static
constexpr
int
NanOpt_
=
NanOpt
;
static
constexpr
int
IndicesOpt_
=
IndicesOpt
;
using
ReduceDims_
=
ReduceDims
;
};
};
using
reduce_description_instances
=
using
reduce_description_instances
=
std
::
tuple
<
ReduceDescription
<
4
,
3
,
0
,
0
,
0
>
,
// for ADD
std
::
tuple
<
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
0
,
0
,
0
>
,
// for ADD
ReduceDescription
<
4
,
1
,
0
,
0
,
0
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
0
,
0
,
0
>
,
ReduceDescription
<
2
,
1
,
0
,
0
,
0
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
0
,
0
,
0
>
,
ReduceDescription
<
4
,
3
,
5
,
0
,
0
>
,
// for AVG
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
5
,
0
,
0
>
,
// for AVG
ReduceDescription
<
4
,
1
,
5
,
0
,
0
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
5
,
0
,
0
>
,
ReduceDescription
<
2
,
1
,
5
,
0
,
0
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
5
,
0
,
0
>
,
ReduceDescription
<
4
,
3
,
7
,
0
,
0
>
,
// for NORM2
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
7
,
0
,
0
>
,
// for NORM2
ReduceDescription
<
4
,
1
,
7
,
0
,
0
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
7
,
0
,
0
>
,
ReduceDescription
<
2
,
1
,
7
,
0
,
0
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
7
,
0
,
0
>
,
ReduceDescription
<
4
,
3
,
2
,
0
,
0
>
,
// for MIN
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
2
,
0
,
0
>
,
// for MIN
ReduceDescription
<
4
,
1
,
2
,
0
,
0
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
2
,
0
,
0
>
,
ReduceDescription
<
2
,
1
,
2
,
0
,
0
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
2
,
0
,
0
>
,
ReduceDescription
<
4
,
3
,
3
,
0
,
0
>
,
// for MAX
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
3
,
0
,
0
>
,
// for MAX
ReduceDescription
<
4
,
1
,
3
,
0
,
0
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
3
,
0
,
0
>
,
ReduceDescription
<
2
,
1
,
3
,
0
,
0
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
3
,
0
,
0
>
,
ReduceDescription
<
4
,
3
,
4
,
0
,
0
>
,
// for AMAX
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
4
,
0
,
0
>
,
// for AMAX
ReduceDescription
<
4
,
1
,
4
,
0
,
0
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
4
,
0
,
0
>
,
ReduceDescription
<
2
,
1
,
4
,
0
,
0
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
4
,
0
,
0
>
,
ReduceDescription
<
4
,
3
,
2
,
0
,
1
>
,
// for MIN
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
2
,
0
,
1
>
,
// for MIN
ReduceDescription
<
4
,
1
,
2
,
0
,
1
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
2
,
0
,
1
>
,
ReduceDescription
<
2
,
1
,
2
,
0
,
1
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
2
,
0
,
1
>
,
ReduceDescription
<
4
,
3
,
3
,
0
,
1
>
,
// for MAX
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
3
,
0
,
1
>
,
// for MAX
ReduceDescription
<
4
,
1
,
3
,
0
,
1
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
3
,
0
,
1
>
,
ReduceDescription
<
2
,
1
,
3
,
0
,
1
>
,
ReduceDescription
<
2
,
Sequence
<
1
>
,
3
,
0
,
1
>
,
ReduceDescription
<
4
,
3
,
4
,
0
,
1
>
,
// for AMAX
ReduceDescription
<
4
,
Sequence
<
0
,
1
,
2
>
,
4
,
0
,
1
>
,
// for AMAX
ReduceDescription
<
4
,
1
,
4
,
0
,
1
>
,
ReduceDescription
<
4
,
Sequence
<
0
>
,
4
,
0
,
1
>
,
ReduceDescription
<
2
,
1
,
4
,
0
,
1
>>
;
ReduceDescription
<
2
,
Sequence
<
1
>
,
4
,
0
,
1
>>
;
template
<
typename
DescriptionType
>
template
<
typename
DescriptionType
>
bool
description_match
(
const
DescriptionType
&
description
,
bool
description_match
(
const
DescriptionType
&
description
,
int
Rank
,
int
Rank
,
const
std
::
vector
<
int
>&
R
educeDims
,
const
std
::
vector
<
int
>&
r
educeDims
,
ReduceTensorOp_t
ReduceOpId
,
ReduceTensorOp_t
ReduceOpId
,
NanPropagation_t
NanOpt
,
NanPropagation_t
NanOpt
,
ReduceTensorIndices_t
IndicesOpt
)
ReduceTensorIndices_t
IndicesOpt
)
...
@@ -66,16 +64,11 @@ bool description_match(const DescriptionType& description,
...
@@ -66,16 +64,11 @@ bool description_match(const DescriptionType& description,
description
.
IndicesOpt_
!=
static_cast
<
int
>
(
IndicesOpt
))
description
.
IndicesOpt_
!=
static_cast
<
int
>
(
IndicesOpt
))
return
(
false
);
return
(
false
);
if
(
DescriptionType
::
ReduceDim
s_
::
Size
()
!=
R
educeDims
.
size
())
if
(
DescriptionType
::
Num
ReduceDim
_
!=
r
educeDims
.
size
())
return
(
false
);
return
(
false
);
bool
result
=
true
;
bool
result
=
true
;
static_for
<
0
,
DescriptionType
::
ReduceDims_
::
Size
(),
1
>
{}([
&
](
auto
i
)
{
if
(
DescriptionType
::
ReduceDims_
::
At
(
i
)
!=
ReduceDims
[
i
])
result
=
false
;
});
return
(
result
);
return
(
result
);
};
};
...
@@ -87,33 +80,29 @@ bool description_match(const DescriptionType& description,
...
@@ -87,33 +80,29 @@ bool description_match(const DescriptionType& description,
namespace
ck
{
namespace
ck
{
namespace
profiler
{
namespace
profiler
{
template
<
int
Rank
,
typename
ReduceDims
>
template
<
index_t
Rank
,
index_t
NumReduceDim
>
static
std
::
vector
<
int
>
get_reduce_dims
()
static
inline
std
::
vector
<
int
>
get_invariant_dims
(
const
std
::
vector
<
int
>&
reduceDims
)
{
std
::
vector
<
int
>
resDims
;
static_for
<
0
,
ReduceDims
::
Size
(),
1
>
{}([
&
](
auto
i
)
{
resDims
.
push_back
(
ReduceDims
::
At
(
i
));
});
return
(
resDims
);
};
template
<
int
Rank
,
typename
ReduceDims
>
static
std
::
vector
<
int
>
get_invariant_dims
()
{
{
std
::
vector
<
int
>
resDims
;
assert
(
NumReduceDim
==
reduceDims
.
size
());
unsigned
int
incFlag
=
0
;
static_for
<
0
,
ReduceDims
::
Size
(),
1
>
{}(
int
reduceFlag
=
0
;
[
&
](
auto
i
)
{
incFlag
=
incFlag
|
(
0x1
<<
ReduceDims
::
At
(
i
));
});
for
(
int
dim
=
0
;
dim
<
Rank
;
dim
++
)
// flag the bits for the reduceDims
for
(
int
i
=
0
;
i
<
NumReduceDim
;
i
++
)
{
{
if
(
incFlag
&
(
0x1
<<
dim
))
reduceFlag
|=
1
<<
reduceDims
[
i
];
continue
;
resDims
.
push_back
(
dim
);
};
};
return
(
resDims
);
std
::
vector
<
int
>
invariantDims
;
// collect invariant dimensions
for
(
int
i
=
0
;
i
<
Rank
;
i
++
)
if
((
reduceFlag
&
(
1
<<
i
))
==
0
)
{
invariantDims
.
push_back
(
i
);
};
return
invariantDims
;
};
};
template
<
typename
T
>
template
<
typename
T
>
...
@@ -149,7 +138,7 @@ template <typename InDataType,
...
@@ -149,7 +138,7 @@ template <typename InDataType,
typename
AccDataType
,
typename
AccDataType
,
typename
OutDataType
,
typename
OutDataType
,
int
Rank
,
int
Rank
,
typename
ReduceDim
s_
,
int
Num
ReduceDim
,
ReduceTensorOp_t
ReduceOpId
,
ReduceTensorOp_t
ReduceOpId
,
NanPropagation_t
NanOpt
,
NanPropagation_t
NanOpt
,
ReduceTensorIndices_t
IndicesOpt
>
ReduceTensorIndices_t
IndicesOpt
>
...
@@ -159,6 +148,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -159,6 +148,7 @@ void profile_reduce_impl_impl(bool do_verification,
bool
do_dumpout
,
bool
do_dumpout
,
int
nrepeat
,
int
nrepeat
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
int
>&
reduceDims
,
float
alpha
,
float
alpha
,
float
beta
)
float
beta
)
{
{
...
@@ -203,15 +193,14 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -203,15 +193,14 @@ void profile_reduce_impl_impl(bool do_verification,
{
{
Tensor
<
InDataType
>
in
(
inLengths
);
Tensor
<
InDataType
>
in
(
inLengths
);
const
std
::
vector
<
int
>
OuterDims
=
get_invariant_dims
<
Rank
,
ReduceDims_
>
();
const
std
::
vector
<
int
>
ReduceDims
=
get_reduce_dims
<
Rank
,
ReduceDims_
>
();
std
::
vector
<
size_t
>
outLengths
;
std
::
vector
<
size_t
>
outLengths
;
if
(
OuterDims
.
empty
())
const
auto
invariantDims
=
get_invariant_dims
<
Rank
,
NumReduceDim
>
(
reduceDims
);
if
(
reduceDims
.
size
()
==
Rank
)
outLengths
.
push_back
(
1
);
outLengths
.
push_back
(
1
);
else
else
for
(
auto
dim
:
Outer
Dims
)
for
(
auto
dim
:
invariant
Dims
)
outLengths
.
push_back
(
inLengths
[
dim
]);
outLengths
.
push_back
(
inLengths
[
dim
]);
Tensor
<
OutDataType
>
out_ref
(
outLengths
);
Tensor
<
OutDataType
>
out_ref
(
outLengths
);
...
@@ -302,7 +291,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -302,7 +291,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType
,
AccDataType
,
OutDataType
,
OutDataType
,
Rank
,
Rank
,
ReduceDim
s_
,
Num
ReduceDim
,
ReduceOpId
,
ReduceOpId
,
NanOpt
,
NanOpt
,
IndicesOpt
>
(
reduce0_ptrs
);
IndicesOpt
>
(
reduce0_ptrs
);
...
@@ -311,7 +300,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -311,7 +300,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType
,
AccDataType
,
OutDataType
,
OutDataType
,
Rank
,
Rank
,
ReduceDim
s_
,
Num
ReduceDim
,
ReduceOpId
,
ReduceOpId
,
NanOpt
,
NanOpt
,
IndicesOpt
>
(
reduce0_ptrs
);
IndicesOpt
>
(
reduce0_ptrs
);
...
@@ -321,7 +310,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -321,7 +310,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType
,
AccDataType
,
OutDataType
,
OutDataType
,
Rank
,
Rank
,
ReduceDim
s_
,
Num
ReduceDim
,
ReduceOpId
,
ReduceOpId
,
NanOpt
,
NanOpt
,
IndicesOpt
>
(
reduce0_ptrs
);
IndicesOpt
>
(
reduce0_ptrs
);
...
@@ -330,7 +319,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -330,7 +319,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType
,
AccDataType
,
OutDataType
,
OutDataType
,
Rank
,
Rank
,
ReduceDim
s_
,
Num
ReduceDim
,
ReduceOpId
,
ReduceOpId
,
NanOpt
,
NanOpt
,
IndicesOpt
>
(
reduce1_ptrs
);
IndicesOpt
>
(
reduce1_ptrs
);
...
@@ -341,7 +330,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -341,7 +330,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType
,
AccDataType
,
OutDataType
,
OutDataType
,
Rank
,
Rank
,
ReduceDim
s_
,
Num
ReduceDim
,
ReduceOpId
,
ReduceOpId
,
NanOpt
,
NanOpt
,
IndicesOpt
>
(
reduce2_ptrs
);
IndicesOpt
>
(
reduce2_ptrs
);
...
@@ -358,7 +347,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -358,7 +347,7 @@ void profile_reduce_impl_impl(bool do_verification,
using
hCompType
=
typename
type_mapping
<
AccDataType
>::
outDataType
;
using
hCompType
=
typename
type_mapping
<
AccDataType
>::
outDataType
;
ReductionHost
<
hInType
,
hCompType
,
hOutType
,
ReduceOpId
,
PropagateNan
,
NeedIndices
>
ReductionHost
<
hInType
,
hCompType
,
hOutType
,
ReduceOpId
,
PropagateNan
,
NeedIndices
>
hostReduce
(
in
.
mDesc
,
out_ref
.
mDesc
,
Outer
Dims
,
R
educeDims
);
hostReduce
(
in
.
mDesc
,
out_ref
.
mDesc
,
invariant
Dims
,
r
educeDims
);
hostReduce
.
Run
(
alpha
,
hostReduce
.
Run
(
alpha
,
reinterpret_cast
<
const
hInType
*>
(
in
.
mData
.
data
()),
reinterpret_cast
<
const
hInType
*>
(
in
.
mData
.
data
()),
...
@@ -383,6 +372,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -383,6 +372,7 @@ void profile_reduce_impl_impl(bool do_verification,
i_inStrides
,
i_inStrides
,
i_outLengths
,
i_outLengths
,
i_outStrides
,
i_outStrides
,
reduceDims
,
alpha
,
alpha
,
beta
,
beta
,
in_dev
.
GetDeviceBuffer
(),
in_dev
.
GetDeviceBuffer
(),
...
@@ -464,6 +454,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -464,6 +454,7 @@ void profile_reduce_impl_impl(bool do_verification,
i_inStrides
,
i_inStrides
,
i_outLengths
,
i_outLengths
,
i_outStrides
,
i_outStrides
,
reduceDims
,
alpha
,
alpha
,
beta
,
beta
,
in_dev
.
GetDeviceBuffer
(),
in_dev
.
GetDeviceBuffer
(),
...
@@ -496,6 +487,7 @@ void profile_reduce_impl_impl(bool do_verification,
...
@@ -496,6 +487,7 @@ void profile_reduce_impl_impl(bool do_verification,
inStrides2
,
inStrides2
,
i_outLengths
,
i_outLengths
,
i_outStrides
,
i_outStrides
,
reduceDims
,
alpha
,
alpha
,
beta
,
beta
,
ws_dev
.
GetDeviceBuffer
(),
ws_dev
.
GetDeviceBuffer
(),
...
@@ -584,7 +576,7 @@ void profile_reduce_impl(bool do_verification,
...
@@ -584,7 +576,7 @@ void profile_reduce_impl(bool do_verification,
bool
do_dumpout
,
bool
do_dumpout
,
int
nrepeat
,
int
nrepeat
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
size_t
>&
inLengths
,
const
std
::
vector
<
int
>&
R
educeDims
,
const
std
::
vector
<
int
>&
r
educeDims
,
ReduceTensorOp_t
ReduceOpId
,
ReduceTensorOp_t
ReduceOpId
,
NanPropagation_t
NanOpt
,
NanPropagation_t
NanOpt
,
ReduceTensorIndices_t
IndicesOpt
,
ReduceTensorIndices_t
IndicesOpt
,
...
@@ -605,18 +597,26 @@ void profile_reduce_impl(bool do_verification,
...
@@ -605,18 +597,26 @@ void profile_reduce_impl(bool do_verification,
using
descType
=
remove_cvref_t
<
decltype
(
std
::
get
<
i
>
(
tuple_object
))
>
;
using
descType
=
remove_cvref_t
<
decltype
(
std
::
get
<
i
>
(
tuple_object
))
>
;
if
(
!
description_match
(
if
(
!
description_match
(
descType
{},
inLengths
.
size
(),
R
educeDims
,
ReduceOpId
,
NanOpt
,
IndicesOpt
))
descType
{},
inLengths
.
size
(),
r
educeDims
,
ReduceOpId
,
NanOpt
,
IndicesOpt
))
return
;
return
;
profile_reduce_impl_impl
<
InDataType
,
profile_reduce_impl_impl
<
InDataType
,
AccDataType
,
AccDataType
,
OutDataType
,
OutDataType
,
descType
::
Rank_
,
descType
::
Rank_
,
typename
descType
::
ReduceDim
s
_
,
descType
::
Num
ReduceDim_
,
static_cast
<
ReduceTensorOp_t
>
(
descType
::
ReduceOpId_
),
static_cast
<
ReduceTensorOp_t
>
(
descType
::
ReduceOpId_
),
static_cast
<
NanPropagation_t
>
(
descType
::
NanOpt_
),
static_cast
<
NanPropagation_t
>
(
descType
::
NanOpt_
),
static_cast
<
ReduceTensorIndices_t
>
(
descType
::
IndicesOpt_
)
>
(
static_cast
<
ReduceTensorIndices_t
>
(
descType
::
IndicesOpt_
)
>
(
do_verification
,
init_method
,
do_log
,
do_dumpout
,
nrepeat
,
inLengths
,
alpha
,
beta
);
do_verification
,
init_method
,
do_log
,
do_dumpout
,
nrepeat
,
inLengths
,
reduceDims
,
alpha
,
beta
);
matched
=
true
;
matched
=
true
;
});
});
...
...
profiler/src/profile_reduce.cpp
View file @
cab8f2e5
...
@@ -25,7 +25,7 @@ using ck::ReduceTensorIndices_t;
...
@@ -25,7 +25,7 @@ using ck::ReduceTensorIndices_t;
using
ck
::
ReduceTensorOp_t
;
using
ck
::
ReduceTensorOp_t
;
static
struct
option
long_options
[]
=
{{
"inLengths"
,
required_argument
,
nullptr
,
'D'
},
static
struct
option
long_options
[]
=
{{
"inLengths"
,
required_argument
,
nullptr
,
'D'
},
{
"
toR
educeDims"
,
required_argument
,
nullptr
,
'R'
},
{
"
r
educeDims"
,
required_argument
,
nullptr
,
'R'
},
{
"reduceOp"
,
required_argument
,
nullptr
,
'O'
},
{
"reduceOp"
,
required_argument
,
nullptr
,
'O'
},
{
"compType"
,
required_argument
,
nullptr
,
'C'
},
{
"compType"
,
required_argument
,
nullptr
,
'C'
},
{
"outType"
,
required_argument
,
nullptr
,
'W'
},
{
"outType"
,
required_argument
,
nullptr
,
'W'
},
...
@@ -93,9 +93,9 @@ typedef enum
...
@@ -93,9 +93,9 @@ typedef enum
appDouble
=
6
,
appDouble
=
6
,
}
appDataType_t
;
}
appDataType_t
;
static
void
check_reduce_dims
(
const
int
rank
,
const
std
::
vector
<
int
>&
toR
educeDims
)
static
void
check_reduce_dims
(
const
int
rank
,
const
std
::
vector
<
int
>&
r
educeDims
)
{
{
for
(
auto
dim
:
toR
educeDims
)
for
(
auto
dim
:
r
educeDims
)
{
{
if
(
dim
<
0
||
dim
>=
rank
)
if
(
dim
<
0
||
dim
>=
rank
)
throw
std
::
runtime_error
(
"Invalid dimension index specified for Reducing"
);
throw
std
::
runtime_error
(
"Invalid dimension index specified for Reducing"
);
...
@@ -103,7 +103,7 @@ static void check_reduce_dims(const int rank, const std::vector<int>& toReduceDi
...
@@ -103,7 +103,7 @@ static void check_reduce_dims(const int rank, const std::vector<int>& toReduceDi
unsigned
int
flag
=
0
;
unsigned
int
flag
=
0
;
for
(
auto
dim
:
toR
educeDims
)
for
(
auto
dim
:
r
educeDims
)
{
{
if
(
flag
&
(
0x1
<<
dim
))
if
(
flag
&
(
0x1
<<
dim
))
throw
std
::
runtime_error
(
"All toReduce dimensions should be different!"
);
throw
std
::
runtime_error
(
"All toReduce dimensions should be different!"
);
...
@@ -122,7 +122,7 @@ class AppArgs
...
@@ -122,7 +122,7 @@ class AppArgs
std
::
vector
<
size_t
>
inLengths
;
std
::
vector
<
size_t
>
inLengths
;
std
::
vector
<
size_t
>
outLengths
;
std
::
vector
<
size_t
>
outLengths
;
std
::
vector
<
int
>
toR
educeDims
;
std
::
vector
<
int
>
r
educeDims
;
std
::
vector
<
float
>
scales
;
std
::
vector
<
float
>
scales
;
...
@@ -152,7 +152,7 @@ class AppArgs
...
@@ -152,7 +152,7 @@ class AppArgs
std
::
cout
<<
"Usage of "
<<
cmd
<<
std
::
endl
;
std
::
cout
<<
"Usage of "
<<
cmd
<<
std
::
endl
;
std
::
cout
<<
"--inLengths or -D, comma separated list of input tensor dimension lengths"
std
::
cout
<<
"--inLengths or -D, comma separated list of input tensor dimension lengths"
<<
std
::
endl
;
<<
std
::
endl
;
std
::
cout
<<
"--
toR
educeDims or -R, comma separated list of to-reduce dimensions"
std
::
cout
<<
"--
r
educeDims or -R, comma separated list of to-reduce dimensions"
<<
std
::
endl
;
<<
std
::
endl
;
std
::
cout
<<
"--reduceOp or -O, enum value indicating the reduction operations"
std
::
cout
<<
"--reduceOp or -O, enum value indicating the reduction operations"
<<
std
::
endl
;
<<
std
::
endl
;
...
@@ -201,7 +201,7 @@ class AppArgs
...
@@ -201,7 +201,7 @@ class AppArgs
if
(
!
optarg
)
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
throw
std
::
runtime_error
(
"Invalid option format!"
);
toR
educeDims
=
getTypeValuesFromString
<
int
>
(
optarg
);
r
educeDims
=
getTypeValuesFromString
<
int
>
(
optarg
);
break
;
break
;
case
'O'
:
case
'O'
:
if
(
!
optarg
)
if
(
!
optarg
)
...
@@ -321,7 +321,7 @@ int profile_reduce(int argc, char* argv[])
...
@@ -321,7 +321,7 @@ int profile_reduce(int argc, char* argv[])
int
rank
=
args
.
inLengths
.
size
();
int
rank
=
args
.
inLengths
.
size
();
check_reduce_dims
(
rank
,
args
.
toR
educeDims
);
check_reduce_dims
(
rank
,
args
.
r
educeDims
);
if
(
args
.
reduceOp
==
ReduceTensorOp_t
::
MUL
||
args
.
reduceOp
==
ReduceTensorOp_t
::
NORM1
)
if
(
args
.
reduceOp
==
ReduceTensorOp_t
::
MUL
||
args
.
reduceOp
==
ReduceTensorOp_t
::
NORM1
)
throw
std
::
runtime_error
(
"MUL and NORM1 are not supported by composable kernel!"
);
throw
std
::
runtime_error
(
"MUL and NORM1 are not supported by composable kernel!"
);
...
@@ -345,7 +345,7 @@ int profile_reduce(int argc, char* argv[])
...
@@ -345,7 +345,7 @@ int profile_reduce(int argc, char* argv[])
args
.
do_dumpout
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
nrepeat
,
args
.
inLengths
,
args
.
inLengths
,
args
.
toR
educeDims
,
args
.
r
educeDims
,
args
.
reduceOp
,
args
.
reduceOp
,
args
.
nanOpt
,
args
.
nanOpt
,
args
.
indicesOpt
,
args
.
indicesOpt
,
...
@@ -360,7 +360,7 @@ int profile_reduce(int argc, char* argv[])
...
@@ -360,7 +360,7 @@ int profile_reduce(int argc, char* argv[])
args
.
do_dumpout
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
nrepeat
,
args
.
inLengths
,
args
.
inLengths
,
args
.
toR
educeDims
,
args
.
r
educeDims
,
args
.
reduceOp
,
args
.
reduceOp
,
args
.
nanOpt
,
args
.
nanOpt
,
args
.
indicesOpt
,
args
.
indicesOpt
,
...
@@ -378,7 +378,7 @@ int profile_reduce(int argc, char* argv[])
...
@@ -378,7 +378,7 @@ int profile_reduce(int argc, char* argv[])
args
.
do_dumpout
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
nrepeat
,
args
.
inLengths
,
args
.
inLengths
,
args
.
toR
educeDims
,
args
.
r
educeDims
,
args
.
reduceOp
,
args
.
reduceOp
,
args
.
nanOpt
,
args
.
nanOpt
,
args
.
indicesOpt
,
args
.
indicesOpt
,
...
@@ -395,7 +395,7 @@ int profile_reduce(int argc, char* argv[])
...
@@ -395,7 +395,7 @@ int profile_reduce(int argc, char* argv[])
args
.
do_dumpout
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
nrepeat
,
args
.
inLengths
,
args
.
inLengths
,
args
.
toR
educeDims
,
args
.
r
educeDims
,
args
.
reduceOp
,
args
.
reduceOp
,
args
.
nanOpt
,
args
.
nanOpt
,
args
.
indicesOpt
,
args
.
indicesOpt
,
...
@@ -410,7 +410,7 @@ int profile_reduce(int argc, char* argv[])
...
@@ -410,7 +410,7 @@ int profile_reduce(int argc, char* argv[])
args
.
do_dumpout
,
args
.
do_dumpout
,
args
.
nrepeat
,
args
.
nrepeat
,
args
.
inLengths
,
args
.
inLengths
,
args
.
toR
educeDims
,
args
.
r
educeDims
,
args
.
reduceOp
,
args
.
reduceOp
,
args
.
nanOpt
,
args
.
nanOpt
,
args
.
indicesOpt
,
args
.
indicesOpt
,
...
...
script/count_vgpr.sh
View file @
cab8f2e5
#!/bin/bash
#!/bin/bash
FILE
=
$1
FILE
=
$1
echo
v0
$(
grep
-w
v0
$FILE
|
wc
-l
)
for
num
in
{
0..255
}
echo
v1
$(
grep
-w
v1
$FILE
|
wc
-l
)
do
echo
v2
$(
grep
-w
v2
$FILE
|
wc
-l
)
base_pattern
=
"(
\[
?
${
num
}
\b
|
\[\d
*:
${
num
}
\]
)"
echo
v3
$(
grep
-w
v3
$FILE
|
wc
-l
)
spattern
=
"s
${
base_pattern
}
"
echo
v4
$(
grep
-w
v4
$FILE
|
wc
-l
)
vpattern
=
"v
${
base_pattern
}
"
echo
v5
$(
grep
-w
v5
$FILE
|
wc
-l
)
apattern
=
"a
${
base_pattern
}
"
echo
v6
$(
grep
-w
v6
$FILE
|
wc
-l
)
scount
=
$(
grep
-P
$spattern
$FILE
|
wc
-l
)
echo
v7
$(
grep
-w
v7
$FILE
|
wc
-l
)
vcount
=
$(
grep
-P
$vpattern
$FILE
|
wc
-l
)
echo
v8
$(
grep
-w
v8
$FILE
|
wc
-l
)
acount
=
$(
grep
-P
$apattern
$FILE
|
wc
-l
)
echo
v9
$(
grep
-w
v9
$FILE
|
wc
-l
)
bash
-c
"echo -n v
${
num
}
$vcount
&&
\
echo
v10
$(
grep
-w
v10
$FILE
|
wc
-l
)
echo -n , s
${
num
}
$scount
&&
\
echo
v11
$(
grep
-w
v11
$FILE
|
wc
-l
)
echo -n , a
${
num
}
$acount
"
echo
v12
$(
grep
-w
v12
$FILE
|
wc
-l
)
if
[[
$scount
-ne
0
||
$vcount
-ne
0
||
$acount
-ne
0
]]
;
then
echo
v13
$(
grep
-w
v13
$FILE
|
wc
-l
)
echo
-n
" *"
echo
v14
$(
grep
-w
v14
$FILE
|
wc
-l
)
fi
echo
v15
$(
grep
-w
v15
$FILE
|
wc
-l
)
echo
""
echo
v16
$(
grep
-w
v16
$FILE
|
wc
-l
)
done
echo
v17
$(
grep
-w
v17
$FILE
|
wc
-l
)
echo
v18
$(
grep
-w
v18
$FILE
|
wc
-l
)
echo
v19
$(
grep
-w
v19
$FILE
|
wc
-l
)
echo
v20
$(
grep
-w
v20
$FILE
|
wc
-l
)
echo
v21
$(
grep
-w
v21
$FILE
|
wc
-l
)
echo
v22
$(
grep
-w
v22
$FILE
|
wc
-l
)
echo
v23
$(
grep
-w
v23
$FILE
|
wc
-l
)
echo
v24
$(
grep
-w
v24
$FILE
|
wc
-l
)
echo
v25
$(
grep
-w
v25
$FILE
|
wc
-l
)
echo
v26
$(
grep
-w
v26
$FILE
|
wc
-l
)
echo
v27
$(
grep
-w
v27
$FILE
|
wc
-l
)
echo
v28
$(
grep
-w
v28
$FILE
|
wc
-l
)
echo
v29
$(
grep
-w
v29
$FILE
|
wc
-l
)
echo
v30
$(
grep
-w
v30
$FILE
|
wc
-l
)
echo
v31
$(
grep
-w
v31
$FILE
|
wc
-l
)
echo
v32
$(
grep
-w
v32
$FILE
|
wc
-l
)
echo
v33
$(
grep
-w
v33
$FILE
|
wc
-l
)
echo
v34
$(
grep
-w
v34
$FILE
|
wc
-l
)
echo
v35
$(
grep
-w
v35
$FILE
|
wc
-l
)
echo
v36
$(
grep
-w
v36
$FILE
|
wc
-l
)
echo
v37
$(
grep
-w
v37
$FILE
|
wc
-l
)
echo
v38
$(
grep
-w
v38
$FILE
|
wc
-l
)
echo
v39
$(
grep
-w
v39
$FILE
|
wc
-l
)
echo
v40
$(
grep
-w
v40
$FILE
|
wc
-l
)
echo
v41
$(
grep
-w
v41
$FILE
|
wc
-l
)
echo
v42
$(
grep
-w
v42
$FILE
|
wc
-l
)
echo
v43
$(
grep
-w
v43
$FILE
|
wc
-l
)
echo
v44
$(
grep
-w
v44
$FILE
|
wc
-l
)
echo
v45
$(
grep
-w
v45
$FILE
|
wc
-l
)
echo
v46
$(
grep
-w
v46
$FILE
|
wc
-l
)
echo
v47
$(
grep
-w
v47
$FILE
|
wc
-l
)
echo
v48
$(
grep
-w
v48
$FILE
|
wc
-l
)
echo
v49
$(
grep
-w
v49
$FILE
|
wc
-l
)
echo
v50
$(
grep
-w
v50
$FILE
|
wc
-l
)
echo
v51
$(
grep
-w
v51
$FILE
|
wc
-l
)
echo
v52
$(
grep
-w
v52
$FILE
|
wc
-l
)
echo
v53
$(
grep
-w
v53
$FILE
|
wc
-l
)
echo
v54
$(
grep
-w
v54
$FILE
|
wc
-l
)
echo
v55
$(
grep
-w
v55
$FILE
|
wc
-l
)
echo
v56
$(
grep
-w
v56
$FILE
|
wc
-l
)
echo
v57
$(
grep
-w
v57
$FILE
|
wc
-l
)
echo
v58
$(
grep
-w
v58
$FILE
|
wc
-l
)
echo
v59
$(
grep
-w
v59
$FILE
|
wc
-l
)
echo
v60
$(
grep
-w
v60
$FILE
|
wc
-l
)
echo
v61
$(
grep
-w
v61
$FILE
|
wc
-l
)
echo
v62
$(
grep
-w
v62
$FILE
|
wc
-l
)
echo
v63
$(
grep
-w
v63
$FILE
|
wc
-l
)
echo
v64
$(
grep
-w
v64
$FILE
|
wc
-l
)
echo
v65
$(
grep
-w
v65
$FILE
|
wc
-l
)
echo
v66
$(
grep
-w
v66
$FILE
|
wc
-l
)
echo
v67
$(
grep
-w
v67
$FILE
|
wc
-l
)
echo
v68
$(
grep
-w
v68
$FILE
|
wc
-l
)
echo
v69
$(
grep
-w
v69
$FILE
|
wc
-l
)
echo
v70
$(
grep
-w
v70
$FILE
|
wc
-l
)
echo
v71
$(
grep
-w
v71
$FILE
|
wc
-l
)
echo
v72
$(
grep
-w
v72
$FILE
|
wc
-l
)
echo
v73
$(
grep
-w
v73
$FILE
|
wc
-l
)
echo
v74
$(
grep
-w
v74
$FILE
|
wc
-l
)
echo
v75
$(
grep
-w
v75
$FILE
|
wc
-l
)
echo
v76
$(
grep
-w
v76
$FILE
|
wc
-l
)
echo
v77
$(
grep
-w
v77
$FILE
|
wc
-l
)
echo
v78
$(
grep
-w
v78
$FILE
|
wc
-l
)
echo
v79
$(
grep
-w
v79
$FILE
|
wc
-l
)
echo
v80
$(
grep
-w
v80
$FILE
|
wc
-l
)
echo
v81
$(
grep
-w
v81
$FILE
|
wc
-l
)
echo
v82
$(
grep
-w
v82
$FILE
|
wc
-l
)
echo
v83
$(
grep
-w
v83
$FILE
|
wc
-l
)
echo
v84
$(
grep
-w
v84
$FILE
|
wc
-l
)
echo
v85
$(
grep
-w
v85
$FILE
|
wc
-l
)
echo
v86
$(
grep
-w
v86
$FILE
|
wc
-l
)
echo
v87
$(
grep
-w
v87
$FILE
|
wc
-l
)
echo
v88
$(
grep
-w
v88
$FILE
|
wc
-l
)
echo
v89
$(
grep
-w
v89
$FILE
|
wc
-l
)
echo
v90
$(
grep
-w
v90
$FILE
|
wc
-l
)
echo
v91
$(
grep
-w
v91
$FILE
|
wc
-l
)
echo
v92
$(
grep
-w
v92
$FILE
|
wc
-l
)
echo
v93
$(
grep
-w
v93
$FILE
|
wc
-l
)
echo
v94
$(
grep
-w
v94
$FILE
|
wc
-l
)
echo
v95
$(
grep
-w
v95
$FILE
|
wc
-l
)
echo
v96
$(
grep
-w
v96
$FILE
|
wc
-l
)
echo
v97
$(
grep
-w
v97
$FILE
|
wc
-l
)
echo
v98
$(
grep
-w
v98
$FILE
|
wc
-l
)
echo
v99
$(
grep
-w
v99
$FILE
|
wc
-l
)
echo
v100
$(
grep
-w
v100
$FILE
|
wc
-l
)
echo
v101
$(
grep
-w
v101
$FILE
|
wc
-l
)
echo
v102
$(
grep
-w
v102
$FILE
|
wc
-l
)
echo
v103
$(
grep
-w
v103
$FILE
|
wc
-l
)
echo
v104
$(
grep
-w
v104
$FILE
|
wc
-l
)
echo
v105
$(
grep
-w
v105
$FILE
|
wc
-l
)
echo
v106
$(
grep
-w
v106
$FILE
|
wc
-l
)
echo
v107
$(
grep
-w
v107
$FILE
|
wc
-l
)
echo
v108
$(
grep
-w
v108
$FILE
|
wc
-l
)
echo
v109
$(
grep
-w
v109
$FILE
|
wc
-l
)
echo
v110
$(
grep
-w
v110
$FILE
|
wc
-l
)
echo
v111
$(
grep
-w
v111
$FILE
|
wc
-l
)
echo
v112
$(
grep
-w
v112
$FILE
|
wc
-l
)
echo
v113
$(
grep
-w
v113
$FILE
|
wc
-l
)
echo
v114
$(
grep
-w
v114
$FILE
|
wc
-l
)
echo
v115
$(
grep
-w
v115
$FILE
|
wc
-l
)
echo
v116
$(
grep
-w
v116
$FILE
|
wc
-l
)
echo
v117
$(
grep
-w
v117
$FILE
|
wc
-l
)
echo
v118
$(
grep
-w
v118
$FILE
|
wc
-l
)
echo
v119
$(
grep
-w
v119
$FILE
|
wc
-l
)
echo
v120
$(
grep
-w
v120
$FILE
|
wc
-l
)
echo
v121
$(
grep
-w
v121
$FILE
|
wc
-l
)
echo
v122
$(
grep
-w
v122
$FILE
|
wc
-l
)
echo
v123
$(
grep
-w
v123
$FILE
|
wc
-l
)
echo
v124
$(
grep
-w
v124
$FILE
|
wc
-l
)
echo
v125
$(
grep
-w
v125
$FILE
|
wc
-l
)
echo
v126
$(
grep
-w
v126
$FILE
|
wc
-l
)
echo
v127
$(
grep
-w
v127
$FILE
|
wc
-l
)
echo
v128
$(
grep
-w
v128
$FILE
|
wc
-l
)
echo
v129
$(
grep
-w
v129
$FILE
|
wc
-l
)
echo
v130
$(
grep
-w
v130
$FILE
|
wc
-l
)
echo
v131
$(
grep
-w
v131
$FILE
|
wc
-l
)
echo
v132
$(
grep
-w
v132
$FILE
|
wc
-l
)
echo
v133
$(
grep
-w
v133
$FILE
|
wc
-l
)
echo
v134
$(
grep
-w
v134
$FILE
|
wc
-l
)
echo
v135
$(
grep
-w
v135
$FILE
|
wc
-l
)
echo
v136
$(
grep
-w
v136
$FILE
|
wc
-l
)
echo
v137
$(
grep
-w
v137
$FILE
|
wc
-l
)
echo
v138
$(
grep
-w
v138
$FILE
|
wc
-l
)
echo
v139
$(
grep
-w
v139
$FILE
|
wc
-l
)
echo
v140
$(
grep
-w
v140
$FILE
|
wc
-l
)
echo
v141
$(
grep
-w
v141
$FILE
|
wc
-l
)
echo
v142
$(
grep
-w
v142
$FILE
|
wc
-l
)
echo
v143
$(
grep
-w
v143
$FILE
|
wc
-l
)
echo
v144
$(
grep
-w
v144
$FILE
|
wc
-l
)
echo
v145
$(
grep
-w
v145
$FILE
|
wc
-l
)
echo
v146
$(
grep
-w
v146
$FILE
|
wc
-l
)
echo
v147
$(
grep
-w
v147
$FILE
|
wc
-l
)
echo
v148
$(
grep
-w
v148
$FILE
|
wc
-l
)
echo
v149
$(
grep
-w
v149
$FILE
|
wc
-l
)
echo
v150
$(
grep
-w
v150
$FILE
|
wc
-l
)
echo
v151
$(
grep
-w
v151
$FILE
|
wc
-l
)
echo
v152
$(
grep
-w
v152
$FILE
|
wc
-l
)
echo
v153
$(
grep
-w
v153
$FILE
|
wc
-l
)
echo
v154
$(
grep
-w
v154
$FILE
|
wc
-l
)
echo
v155
$(
grep
-w
v155
$FILE
|
wc
-l
)
echo
v156
$(
grep
-w
v156
$FILE
|
wc
-l
)
echo
v157
$(
grep
-w
v157
$FILE
|
wc
-l
)
echo
v158
$(
grep
-w
v158
$FILE
|
wc
-l
)
echo
v159
$(
grep
-w
v159
$FILE
|
wc
-l
)
echo
v160
$(
grep
-w
v160
$FILE
|
wc
-l
)
echo
v161
$(
grep
-w
v161
$FILE
|
wc
-l
)
echo
v162
$(
grep
-w
v162
$FILE
|
wc
-l
)
echo
v163
$(
grep
-w
v163
$FILE
|
wc
-l
)
echo
v164
$(
grep
-w
v164
$FILE
|
wc
-l
)
echo
v165
$(
grep
-w
v165
$FILE
|
wc
-l
)
echo
v166
$(
grep
-w
v166
$FILE
|
wc
-l
)
echo
v167
$(
grep
-w
v167
$FILE
|
wc
-l
)
echo
v168
$(
grep
-w
v168
$FILE
|
wc
-l
)
echo
v169
$(
grep
-w
v169
$FILE
|
wc
-l
)
echo
v170
$(
grep
-w
v170
$FILE
|
wc
-l
)
echo
v171
$(
grep
-w
v171
$FILE
|
wc
-l
)
echo
v172
$(
grep
-w
v172
$FILE
|
wc
-l
)
echo
v173
$(
grep
-w
v173
$FILE
|
wc
-l
)
echo
v174
$(
grep
-w
v174
$FILE
|
wc
-l
)
echo
v175
$(
grep
-w
v175
$FILE
|
wc
-l
)
echo
v176
$(
grep
-w
v176
$FILE
|
wc
-l
)
echo
v177
$(
grep
-w
v177
$FILE
|
wc
-l
)
echo
v178
$(
grep
-w
v178
$FILE
|
wc
-l
)
echo
v179
$(
grep
-w
v179
$FILE
|
wc
-l
)
echo
v180
$(
grep
-w
v180
$FILE
|
wc
-l
)
echo
v181
$(
grep
-w
v181
$FILE
|
wc
-l
)
echo
v182
$(
grep
-w
v182
$FILE
|
wc
-l
)
echo
v183
$(
grep
-w
v183
$FILE
|
wc
-l
)
echo
v184
$(
grep
-w
v184
$FILE
|
wc
-l
)
echo
v185
$(
grep
-w
v185
$FILE
|
wc
-l
)
echo
v186
$(
grep
-w
v186
$FILE
|
wc
-l
)
echo
v187
$(
grep
-w
v187
$FILE
|
wc
-l
)
echo
v188
$(
grep
-w
v188
$FILE
|
wc
-l
)
echo
v189
$(
grep
-w
v189
$FILE
|
wc
-l
)
echo
v190
$(
grep
-w
v190
$FILE
|
wc
-l
)
echo
v191
$(
grep
-w
v191
$FILE
|
wc
-l
)
echo
v192
$(
grep
-w
v192
$FILE
|
wc
-l
)
echo
v193
$(
grep
-w
v193
$FILE
|
wc
-l
)
echo
v194
$(
grep
-w
v194
$FILE
|
wc
-l
)
echo
v195
$(
grep
-w
v195
$FILE
|
wc
-l
)
echo
v196
$(
grep
-w
v196
$FILE
|
wc
-l
)
echo
v197
$(
grep
-w
v197
$FILE
|
wc
-l
)
echo
v198
$(
grep
-w
v198
$FILE
|
wc
-l
)
echo
v199
$(
grep
-w
v199
$FILE
|
wc
-l
)
echo
v200
$(
grep
-w
v200
$FILE
|
wc
-l
)
echo
v201
$(
grep
-w
v201
$FILE
|
wc
-l
)
echo
v202
$(
grep
-w
v202
$FILE
|
wc
-l
)
echo
v203
$(
grep
-w
v203
$FILE
|
wc
-l
)
echo
v204
$(
grep
-w
v204
$FILE
|
wc
-l
)
echo
v205
$(
grep
-w
v205
$FILE
|
wc
-l
)
echo
v206
$(
grep
-w
v206
$FILE
|
wc
-l
)
echo
v207
$(
grep
-w
v207
$FILE
|
wc
-l
)
echo
v208
$(
grep
-w
v208
$FILE
|
wc
-l
)
echo
v209
$(
grep
-w
v209
$FILE
|
wc
-l
)
echo
v210
$(
grep
-w
v210
$FILE
|
wc
-l
)
echo
v211
$(
grep
-w
v211
$FILE
|
wc
-l
)
echo
v212
$(
grep
-w
v212
$FILE
|
wc
-l
)
echo
v213
$(
grep
-w
v213
$FILE
|
wc
-l
)
echo
v214
$(
grep
-w
v214
$FILE
|
wc
-l
)
echo
v215
$(
grep
-w
v215
$FILE
|
wc
-l
)
echo
v216
$(
grep
-w
v216
$FILE
|
wc
-l
)
echo
v217
$(
grep
-w
v217
$FILE
|
wc
-l
)
echo
v218
$(
grep
-w
v218
$FILE
|
wc
-l
)
echo
v219
$(
grep
-w
v219
$FILE
|
wc
-l
)
echo
v220
$(
grep
-w
v220
$FILE
|
wc
-l
)
echo
v221
$(
grep
-w
v221
$FILE
|
wc
-l
)
echo
v222
$(
grep
-w
v222
$FILE
|
wc
-l
)
echo
v223
$(
grep
-w
v223
$FILE
|
wc
-l
)
echo
v224
$(
grep
-w
v224
$FILE
|
wc
-l
)
echo
v225
$(
grep
-w
v225
$FILE
|
wc
-l
)
echo
v226
$(
grep
-w
v226
$FILE
|
wc
-l
)
echo
v227
$(
grep
-w
v227
$FILE
|
wc
-l
)
echo
v228
$(
grep
-w
v228
$FILE
|
wc
-l
)
echo
v229
$(
grep
-w
v229
$FILE
|
wc
-l
)
echo
v230
$(
grep
-w
v230
$FILE
|
wc
-l
)
echo
v231
$(
grep
-w
v231
$FILE
|
wc
-l
)
echo
v232
$(
grep
-w
v232
$FILE
|
wc
-l
)
echo
v233
$(
grep
-w
v233
$FILE
|
wc
-l
)
echo
v234
$(
grep
-w
v234
$FILE
|
wc
-l
)
echo
v235
$(
grep
-w
v235
$FILE
|
wc
-l
)
echo
v236
$(
grep
-w
v236
$FILE
|
wc
-l
)
echo
v237
$(
grep
-w
v237
$FILE
|
wc
-l
)
echo
v238
$(
grep
-w
v238
$FILE
|
wc
-l
)
echo
v239
$(
grep
-w
v239
$FILE
|
wc
-l
)
echo
v240
$(
grep
-w
v240
$FILE
|
wc
-l
)
echo
v241
$(
grep
-w
v241
$FILE
|
wc
-l
)
echo
v242
$(
grep
-w
v242
$FILE
|
wc
-l
)
echo
v243
$(
grep
-w
v243
$FILE
|
wc
-l
)
echo
v244
$(
grep
-w
v244
$FILE
|
wc
-l
)
echo
v245
$(
grep
-w
v245
$FILE
|
wc
-l
)
echo
v246
$(
grep
-w
v246
$FILE
|
wc
-l
)
echo
v247
$(
grep
-w
v247
$FILE
|
wc
-l
)
echo
v248
$(
grep
-w
v248
$FILE
|
wc
-l
)
echo
v249
$(
grep
-w
v249
$FILE
|
wc
-l
)
echo
v250
$(
grep
-w
v250
$FILE
|
wc
-l
)
echo
v251
$(
grep
-w
v251
$FILE
|
wc
-l
)
echo
v252
$(
grep
-w
v252
$FILE
|
wc
-l
)
echo
v253
$(
grep
-w
v253
$FILE
|
wc
-l
)
echo
v254
$(
grep
-w
v254
$FILE
|
wc
-l
)
echo
v255
$(
grep
-w
v255
$FILE
|
wc
-l
)
script/profile_reduce_no_index.sh
View file @
cab8f2e5
#!/bin/bash
#!/bin/bash
PRECISION
=
##--half
PRECISION
=
##PRECISION=--half
##PRECISION=--double
if
test
-n
$PRECISION
&&
test
"
$PRECISION
"
=
"--half"
;
then
if
test
-n
$PRECISION
&&
test
"
$PRECISION
"
=
"--half"
;
then
CTYPE
=
"-C 1"
AC
CTYPE
=
"-C 1"
else
else
CTYPE
=
""
AC
CTYPE
=
""
fi
fi
WTYPE
=
driver
=
"./bin/ckProfiler"
VERIFY
=
"-v
$1
"
INIT
=
$2
NREPEAT
=
$3
if
[
$#
-ge
1
]
;
then
NREPEAT
=
$1
else
NREPEAT
=
1
fi
Operation
=
7
#### 0 - ADD, 5 - AVG, 7 - NORM2
Operations
=
"0 5 7"
## for generic validation
## for generic validation
for
op
in
$Operation
;
do
for
op
in
$Operation
s
;
do
set
-x
set
-x
./bin/ckProfiler reduce
$PRECISION
-D
64,4,280,82
-R
0
-O
$op
$CTYPE
-v
1 1
$NREPEAT
####### datatype layout reduce dims op acctype verify init repeats
./bin/ckProfiler reduce
$PRECISION
-D
4,64,280,82
-R
0
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
280,4,64,82
-R
0
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
1
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
64,4,280,82
-R
0,1,2
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
4,64,280,82
-R
0,1,2
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
3
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
64,280,82,4
-R
0,1,2
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
700,8192
-R
1
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
1,2,3
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
700,1024
-R
1
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0,2,3
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
700,4
-R
1
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0,1,3
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
256,22960
-R
0
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
256,22960
-R
1
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
4,1469440
-R
0
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
4,1469440
-R
1
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
set
+x
set
+x
done
done
Operation
=
5
#### 0 - ADD, 5 - AVG, 7 - NORM2
Operations
=
5
## for performance evaluation (resnet50 NHWC => C)
## for performance evaluation (resnet50 NHWC => C)
for
op
in
$Operation
;
do
for
op
in
$Operation
s
;
do
set
-x
set
-x
./bin/ckProfiler reduce
$PRECISION
-D
256,14,14,1024
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
####### datatype layout reduce dims op acctype verify init repeats
./bin/ckProfiler reduce
$PRECISION
-D
256,28,28,128
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,14,14,1024
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,58,58,128
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,28,28,128
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,7,7,2048
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,58,58,128
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,14,14,256
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,7,7,2048
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,30,30,256
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,14,14,256
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,56,56,256
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,30,30,256
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,16,16,512
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,56,56,256
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,28,28,512
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,16,16,512
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,7,7,512
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,28,28,512
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,56,56,64
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,7,7,512
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,230,230,3
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,56,56,64
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,14,14,1024
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,230,230,3
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,28,28,128
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,14,14,1024
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,58,58,128
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,28,28,128
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,7,7,2048
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,58,58,128
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,14,14,256
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,7,7,2048
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,30,30,256
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,14,14,256
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,56,56,256
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,30,30,256
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,16,16,512
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,56,56,256
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,28,28,512
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,16,16,512
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,7,7,512
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,28,28,512
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,56,56,64
-R
0,1,2
-O
$op
$CTYPE
$WTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,7,7,512
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
128,56,56,64
-R
0,1,2
-O
$op
$ACCTYPE
$VERIFY
$INIT
$NREPEAT
set
+x
set
+x
done
done
script/profile_reduce_with_index.sh
View file @
cab8f2e5
#!/bin/bash
#!/bin/bash
PRECISION
=
##--half
PRECISION
=
##PRECISION=--half
##PRECISION=--double
if
[
$#
-ge
1
]
;
then
driver
=
"./bin/ckProfiler"
NREPEAT
=
$1
else
NREPEAT
=
1
fi
Operation
=
4
VERIFY
=
"-v
$1
"
INIT
=
$2
NREPEAT
=
$3
LENGTHS
=
64,4,280,82
#### 2 - MIN, 3 - MAX, 4 - AMAX
Operations
=
"2 4"
## for generic validation
## for generic validation
for
op
in
$Operation
;
do
for
op
in
$Operation
s
;
do
for
use_idx
in
0 1
;
do
for
use_idx
in
0 1
;
do
set
-x
set
-x
./bin/ckProfiler reduce
$PRECISION
-D
64,4,280,82
-R
0
-O
$op
$CTYPE
-v
1 1
$NREPEAT
####### datatype layout reduce dims op use index verify init repeats
./bin/ckProfiler reduce
$PRECISION
-D
4,64,280,82
-R
0
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
280,4,64,82
-R
0
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
1
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
64,4,280,82
-R
0,1,2
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
4,64,280,82
-R
0,1,2
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
3
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
64,280,82,4
-R
0,1,2
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
700,8192
-R
1
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
1,2,3
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
700,1024
-R
1
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0,2,3
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
700,4
-R
1
-O
$op
$CTYPE
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
64,4,280,82
-R
0,1,3
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
256,22960
-R
0
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
256,22960
-R
1
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
4,1469440
-R
0
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
4,1469440
-R
1
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
set
+x
set
+x
done
done
done
done
Operations
=
2
## for performance evaluation (resnet50 NHWC => C)
## for performance evaluation (resnet50 NHWC => C)
for
op
in
$Operation
;
do
for
op
in
$Operation
s
;
do
for
use_idx
in
0 1
;
do
for
use_idx
in
0 1
;
do
set
-x
set
-x
./bin/ckProfiler reduce
$PRECISION
-D
256,14,14,1024
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
####### datatype layout reduce dims op use index verify init repeats
./bin/ckProfiler reduce
$PRECISION
-D
256,28,28,128
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,14,14,1024
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,58,58,128
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,28,28,128
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,7,7,2048
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,58,58,128
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,14,14,256
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,7,7,2048
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,30,30,256
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,14,14,256
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,56,56,256
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,30,30,256
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,16,16,512
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,56,56,256
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,28,28,512
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,16,16,512
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,7,7,512
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,28,28,512
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,56,56,64
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,7,7,512
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
256,230,230,3
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,56,56,64
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,14,14,1024
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
256,230,230,3
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,28,28,128
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,14,14,1024
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,58,58,128
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,28,28,128
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,7,7,2048
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,58,58,128
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,14,14,256
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,7,7,2048
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,30,30,256
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,14,14,256
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,56,56,256
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,30,30,256
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,16,16,512
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,56,56,256
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,28,28,512
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,16,16,512
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,7,7,512
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,28,28,512
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
./bin/ckProfiler reduce
$PRECISION
-D
128,56,56,64
-R
0,1,2
-O
$op
-I
$use_idx
-v
1 1
$NREPEAT
$driver
reduce
$PRECISION
-D
128,7,7,512
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
$driver
reduce
$PRECISION
-D
128,56,56,64
-R
0,1,2
-O
$op
-I
$use_idx
$VERIFY
$INIT
$NREPEAT
set
+x
set
+x
done
done
done
done
...
...
test/space_filling_curve/space_filling_curve.cpp
View file @
cab8f2e5
...
@@ -95,13 +95,13 @@ void traverse_using_space_filling_curve()
...
@@ -95,13 +95,13 @@ void traverse_using_space_filling_curve()
make_tuple
(
12
,
2
,
6
),
make_tuple
(
12
,
2
,
6
),
make_tuple
(
12
,
0
,
6
));
make_tuple
(
12
,
0
,
6
));
constexpr
index_t
num_access
es
=
SpaceFillingCurve
::
GetNumOfAccess
();
constexpr
index_t
num_access
=
SpaceFillingCurve
::
GetNumOfAccess
();
static_assert
(
num_access
es
==
reduce_on_sequence
(
TensorLengths
{}
/
ScalarsPerAccess
{},
static_assert
(
num_access
==
reduce_on_sequence
(
TensorLengths
{}
/
ScalarsPerAccess
{},
math
::
multiplies
{},
math
::
multiplies
{},
Number
<
1
>
{}));
Number
<
1
>
{}));
static_for
<
1
,
num_access
es
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
1
,
num_access
,
1
>
{}([
&
](
auto
i
)
{
constexpr
auto
idx_curr
=
SpaceFillingCurve
::
GetIndex
(
i
);
constexpr
auto
idx_curr
=
SpaceFillingCurve
::
GetIndex
(
i
);
static_assert
(
idx_curr
[
I0
]
==
expected
[
i
][
I0
]);
static_assert
(
idx_curr
[
I0
]
==
expected
[
i
][
I0
]);
...
@@ -115,7 +115,7 @@ void traverse_using_space_filling_curve()
...
@@ -115,7 +115,7 @@ void traverse_using_space_filling_curve()
static_assert
(
backward_step
[
I2
]
==
expected_step
[
I2
]);
static_assert
(
backward_step
[
I2
]
==
expected_step
[
I2
]);
});
});
static_for
<
0
,
num_access
es
-
1
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
num_access
-
1
,
1
>
{}([
&
](
auto
i
)
{
constexpr
auto
idx_curr
=
SpaceFillingCurve
::
GetIndex
(
i
);
constexpr
auto
idx_curr
=
SpaceFillingCurve
::
GetIndex
(
i
);
static_assert
(
idx_curr
[
I0
]
==
expected
[
i
][
I0
]);
static_assert
(
idx_curr
[
I0
]
==
expected
[
i
][
I0
]);
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment