Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
31d1913f
".github/git@developer.sourcefind.cn:Fzc7075/nunchaku.git" did not exist on "b914a34798d06802db39ad9c1c60d511ea66f02c"
Unverified
Commit
31d1913f
authored
Jun 02, 2022
by
Shaojie WANG
Committed by
GitHub
Jun 02, 2022
Browse files
Merge branch 'develop' into fix_ctile_err_for_conv2d_fwd_bias_relu_add
parents
b5f1f3eb
86185bd7
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
12 deletions
+14
-12
profiler/include/profile_gemm_reduce_impl.hpp
profiler/include/profile_gemm_reduce_impl.hpp
+2
-2
profiler/include/profile_reduce_impl.hpp
profiler/include/profile_reduce_impl.hpp
+12
-10
No files found.
profiler/include/profile_gemm_reduce_impl.hpp
View file @
31d1913f
...
...
@@ -165,8 +165,8 @@ bool profile_gemm_reduce_impl(int do_verification,
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
float
d0_acc
=
d0_reduce_op
.
Get
ReductionZero
Val
();
float
d1_acc
=
d1_reduce_op
.
Get
ReductionZero
Val
();
float
d0_acc
=
d0_reduce_op
.
Get
Identity
Val
ue
();
float
d1_acc
=
d1_reduce_op
.
Get
Identity
Val
ue
();
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
...
...
profiler/include/profile_reduce_impl.hpp
View file @
31d1913f
...
...
@@ -138,7 +138,6 @@ bool profile_reduce_impl_impl(bool do_verification,
{
using
namespace
ck
::
tensor_operation
::
device
;
using
namespace
ck
::
tensor_operation
::
device
::
device_reduce_instance
;
using
namespace
ck
::
host_reduce
;
using
ck
::
host_common
::
dumpBufferToFile
;
constexpr
bool
op_support_indices
=
...
...
@@ -261,15 +260,17 @@ bool profile_reduce_impl_impl(bool do_verification,
float
best_avg_time
=
0
;
float
best_gb_per_sec
=
0
;
using
InElementwiseOperation
_0
=
using
InElementwiseOperation
=
typename
reduce_unary_operator
<
AccDataType
,
ReduceOpId
,
true
,
true
>::
InElementwiseOperation
;
using
AccElementwiseOperation
_0
=
using
AccElementwiseOperation
=
typename
reduce_unary_operator
<
AccDataType
,
ReduceOpId
,
true
,
true
>::
AccElementwiseOperation
;
using
ReduceOperation
=
typename
reduce_binary_operator
<
AccDataType
,
ReduceOpId
>::
opType
;
using
DeviceReduceInstPtr0
=
DeviceReducePtr
<
InElementwiseOperation
_0
,
AccElementwiseOperation
_0
>
;
DeviceReducePtr
<
InElementwiseOperation
,
AccElementwiseOperation
>
;
std
::
vector
<
DeviceReduceInstPtr0
>
reduce0_ptrs
;
...
...
@@ -313,7 +314,9 @@ bool profile_reduce_impl_impl(bool do_verification,
ReductionHost
<
InDataType
,
AccDataType
,
OutDataType
,
ReduceOpId
,
ReduceOperation
,
InElementwiseOperation
,
AccElementwiseOperation
,
Rank
,
NumReduceDim
,
PropagateNan
,
...
...
@@ -337,9 +340,8 @@ bool profile_reduce_impl_impl(bool do_verification,
for
(
auto
&
reduce_ptr
:
reduce0_ptrs
)
{
InElementwiseOperation_0
in_elementwise_op_0
(
static_cast
<
int32_t
>
(
reduce_total_length
));
AccElementwiseOperation_0
acc_elementwise_op_0
(
static_cast
<
int32_t
>
(
reduce_total_length
));
InElementwiseOperation
in_elementwise_op
(
static_cast
<
int32_t
>
(
reduce_total_length
));
AccElementwiseOperation
acc_elementwise_op
(
static_cast
<
int32_t
>
(
reduce_total_length
));
auto
argument_ptr
=
reduce_ptr
->
MakeArgumentPointer
(
i_inLengths
,
i_inStrides
,
...
...
@@ -352,8 +354,8 @@ bool profile_reduce_impl_impl(bool do_verification,
nullptr
,
out_dev
.
GetDeviceBuffer
(),
out_indices_dev
.
GetDeviceBuffer
(),
in_elementwise_op
_0
,
acc_elementwise_op
_0
);
in_elementwise_op
,
acc_elementwise_op
);
if
(
!
reduce_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
continue
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment