Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
95a83c6e
Commit
95a83c6e
authored
Nov 18, 2022
by
Adam Osewski
Browse files
Merge remote-tracking branch 'origin/develop' into wavelet_model
parents
5b7c2432
892a8d76
Changes
618
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
468 additions
and
87 deletions
+468
-87
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
...softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
...ance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
+38
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
...on_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp
...u/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp
...u/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp
+22
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp
...stance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp
+40
-0
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp
...peration_instance/gpu/softmax/device_softmax_instance.hpp
+8
-0
library/include/ck/library/utility/algorithm.hpp
library/include/ck/library/utility/algorithm.hpp
+43
-0
library/include/ck/library/utility/check_err.hpp
library/include/ck/library/utility/check_err.hpp
+46
-43
library/include/ck/library/utility/convolution_parameter.hpp
library/include/ck/library/utility/convolution_parameter.hpp
+6
-8
library/include/ck/library/utility/fill.hpp
library/include/ck/library/utility/fill.hpp
+14
-3
library/include/ck/library/utility/host_reduction.hpp
library/include/ck/library/utility/host_reduction.hpp
+3
-7
library/include/ck/library/utility/host_tensor.hpp
library/include/ck/library/utility/host_tensor.hpp
+28
-26
No files found.
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f32_f32_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_f32_f32_instances
=
std
::
tuple
<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
1
,
1
>
,
// fallback kernel
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
16
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
32
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
8
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
16
,
1
,
4
,
4
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
32
,
1
,
4
,
4
>
,
// Reduction on middle dimensions
// InSrcVectorDim is 0 since we want to coalesce reads on M dimension
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
8
,
4
,
0
,
1
,
1
>
,
DeviceSoftmaxImpl
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
8
,
4
,
0
,
4
,
4
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
void
add_device_softmax_i8_i8_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce1.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce2.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank3_reduce3.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank3_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce1.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce1_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce2.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce2_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce3.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_rank4_reduce4.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_i8_i8_rank4_reduce4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
instances
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance_type.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
template
<
index_t
Rank
,
index_t
Reduce
>
using
device_softmax_i8_i8_instances
=
std
::
tuple
<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// fallback kernel
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
16
,
1
,
1
,
1
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
4
,
64
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
32
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
2
,
128
,
1
,
64
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
16
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
32
,
1
,
16
,
16
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
1
,
256
,
1
,
64
,
1
,
16
,
16
>
,
// Reduction on middle dimensions
// InSrcVectorDim is 0 since we want to coalesce reads on M dimension
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
8
,
32
,
8
,
8
,
0
,
1
,
1
>
,
DeviceSoftmaxImpl
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
Rank
,
Reduce
,
256
,
32
,
8
,
32
,
8
,
0
,
16
,
8
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp"
library/include/ck/library/utility/algorithm.hpp
0 → 100644
View file @
95a83c6e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <algorithm>
#include <iterator>
#include <type_traits>
#include <utility>
namespace
ck
{
namespace
ranges
{
template
<
typename
InputRange
,
typename
OutputIterator
>
auto
copy
(
InputRange
&&
range
,
OutputIterator
iter
)
->
decltype
(
std
::
copy
(
std
::
begin
(
std
::
forward
<
InputRange
>
(
range
)),
std
::
end
(
std
::
forward
<
InputRange
>
(
range
)),
iter
))
{
return
std
::
copy
(
std
::
begin
(
std
::
forward
<
InputRange
>
(
range
)),
std
::
end
(
std
::
forward
<
InputRange
>
(
range
)),
iter
);
}
template
<
typename
T
,
typename
OutputRange
>
auto
fill
(
OutputRange
&&
range
,
const
T
&
init
)
->
std
::
void_t
<
decltype
(
std
::
fill
(
std
::
begin
(
std
::
forward
<
OutputRange
>
(
range
)),
std
::
end
(
std
::
forward
<
OutputRange
>
(
range
)),
init
))
>
{
std
::
fill
(
std
::
begin
(
std
::
forward
<
OutputRange
>
(
range
)),
std
::
end
(
std
::
forward
<
OutputRange
>
(
range
)),
init
);
}
template
<
typename
InputRange
,
typename
OutputIterator
,
typename
UnaryOperation
>
auto
transform
(
InputRange
&&
range
,
OutputIterator
iter
,
UnaryOperation
unary_op
)
->
decltype
(
std
::
transform
(
std
::
begin
(
range
),
std
::
end
(
range
),
iter
,
unary_op
))
{
return
std
::
transform
(
std
::
begin
(
range
),
std
::
end
(
range
),
iter
,
unary_op
);
}
}
// namespace ranges
}
// namespace ck
library/include/ck/library/utility/check_err.hpp
View file @
95a83c6e
...
@@ -15,18 +15,22 @@
...
@@ -15,18 +15,22 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/span.hpp"
#include "ck/utility/type.hpp"
#include "ck/utility/type.hpp"
#include "ck/host_utility/io.hpp"
#include "ck/host_utility/io.hpp"
#include "ck/library/utility/ranges.hpp"
namespace
ck
{
namespace
ck
{
namespace
utils
{
namespace
utils
{
template
<
typename
T
>
template
<
typename
Range
,
typename
RefRange
>
typename
std
::
enable_if
<
std
::
is_floating_point
<
T
>::
value
&&
!
std
::
is_same
<
T
,
half_t
>::
value
,
typename
std
::
enable_if
<
bool
>::
type
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
ranges
::
range_value_t
<
RefRange
>>
&&
check_err
(
const
std
::
vector
<
T
>&
out
,
std
::
is_floating_point_v
<
ranges
::
range_value_t
<
Range
>>
&&
const
std
::
vector
<
T
>&
ref
,
!
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
half_t
>
,
bool
>::
type
check_err
(
const
Range
&
out
,
const
RefRange
&
ref
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
double
rtol
=
1e-5
,
double
rtol
=
1e-5
,
double
atol
=
3e-6
)
double
atol
=
3e-6
)
...
@@ -44,15 +48,17 @@ check_err(const std::vector<T>& out,
...
@@ -44,15 +48,17 @@ check_err(const std::vector<T>& out,
double
max_err
=
std
::
numeric_limits
<
double
>::
min
();
double
max_err
=
std
::
numeric_limits
<
double
>::
min
();
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
{
{
err
=
std
::
abs
(
out
[
i
]
-
ref
[
i
]);
const
double
o
=
*
std
::
next
(
std
::
begin
(
out
),
i
);
if
(
err
>
atol
+
rtol
*
std
::
abs
(
ref
[
i
])
||
!
std
::
isfinite
(
out
[
i
])
||
!
std
::
isfinite
(
ref
[
i
]))
const
double
r
=
*
std
::
next
(
std
::
begin
(
ref
),
i
);
err
=
std
::
abs
(
o
-
r
);
if
(
err
>
atol
+
rtol
*
std
::
abs
(
r
)
||
!
std
::
isfinite
(
o
)
||
!
std
::
isfinite
(
r
))
{
{
max_err
=
err
>
max_err
?
err
:
max_err
;
max_err
=
err
>
max_err
?
err
:
max_err
;
err_count
++
;
err_count
++
;
if
(
err_count
<
5
)
if
(
err_count
<
5
)
{
{
std
::
cerr
<<
msg
<<
std
::
setw
(
12
)
<<
std
::
setprecision
(
7
)
<<
" out["
<<
i
std
::
cerr
<<
msg
<<
std
::
setw
(
12
)
<<
std
::
setprecision
(
7
)
<<
" out["
<<
i
<<
"] != ref["
<<
i
<<
"]: "
<<
o
ut
[
i
]
<<
" != "
<<
r
ef
[
i
]
<<
std
::
endl
;
<<
"] != ref["
<<
i
<<
"]: "
<<
o
<<
" != "
<<
r
<<
std
::
endl
;
}
}
res
=
false
;
res
=
false
;
}
}
...
@@ -64,10 +70,13 @@ check_err(const std::vector<T>& out,
...
@@ -64,10 +70,13 @@ check_err(const std::vector<T>& out,
return
res
;
return
res
;
}
}
template
<
typename
T
>
template
<
typename
Range
,
typename
RefRange
>
typename
std
::
enable_if
<
std
::
is_same
<
T
,
bhalf_t
>::
value
,
bool
>::
type
typename
std
::
enable_if
<
check_err
(
const
std
::
vector
<
T
>&
out
,
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
ranges
::
range_value_t
<
RefRange
>>
&&
const
std
::
vector
<
T
>&
ref
,
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
bhalf_t
>
,
bool
>::
type
check_err
(
const
Range
&
out
,
const
RefRange
&
ref
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
double
rtol
=
1e-3
,
double
rtol
=
1e-3
,
double
atol
=
1e-3
)
double
atol
=
1e-3
)
...
@@ -86,9 +95,9 @@ check_err(const std::vector<T>& out,
...
@@ -86,9 +95,9 @@ check_err(const std::vector<T>& out,
double
max_err
=
std
::
numeric_limits
<
float
>::
min
();
double
max_err
=
std
::
numeric_limits
<
float
>::
min
();
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
{
{
double
o
=
type_convert
<
float
>
(
out
[
i
]
);
const
double
o
=
type_convert
<
float
>
(
*
std
::
next
(
std
::
begin
(
out
),
i
)
);
double
r
=
type_convert
<
float
>
(
ref
[
i
]
);
const
double
r
=
type_convert
<
float
>
(
*
std
::
next
(
std
::
begin
(
ref
),
i
)
);
err
=
std
::
abs
(
o
-
r
);
err
=
std
::
abs
(
o
-
r
);
if
(
err
>
atol
+
rtol
*
std
::
abs
(
r
)
||
!
std
::
isfinite
(
o
)
||
!
std
::
isfinite
(
r
))
if
(
err
>
atol
+
rtol
*
std
::
abs
(
r
)
||
!
std
::
isfinite
(
o
)
||
!
std
::
isfinite
(
r
))
{
{
max_err
=
err
>
max_err
?
err
:
max_err
;
max_err
=
err
>
max_err
?
err
:
max_err
;
...
@@ -108,10 +117,13 @@ check_err(const std::vector<T>& out,
...
@@ -108,10 +117,13 @@ check_err(const std::vector<T>& out,
return
res
;
return
res
;
}
}
template
<
typename
T
>
template
<
typename
Range
,
typename
RefRange
>
typename
std
::
enable_if
<
std
::
is_same_v
<
T
,
half_t
>
,
bool
>::
type
typename
std
::
enable_if
<
check_err
(
span
<
const
T
>
out
,
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
ranges
::
range_value_t
<
RefRange
>>
&&
span
<
const
T
>
ref
,
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
half_t
>
,
bool
>::
type
check_err
(
const
Range
&
out
,
const
RefRange
&
ref
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
double
rtol
=
1e-3
,
double
rtol
=
1e-3
,
double
atol
=
1e-3
)
double
atol
=
1e-3
)
...
@@ -126,12 +138,12 @@ check_err(span<const T> out,
...
@@ -126,12 +138,12 @@ check_err(span<const T> out,
bool
res
{
true
};
bool
res
{
true
};
int
err_count
=
0
;
int
err_count
=
0
;
double
err
=
0
;
double
err
=
0
;
double
max_err
=
std
::
numeric_limits
<
T
>::
min
();
double
max_err
=
std
::
numeric_limits
<
ranges
::
range_value_t
<
Range
>
>::
min
();
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
{
{
double
o
=
type_convert
<
float
>
(
out
[
i
]
);
const
double
o
=
type_convert
<
float
>
(
*
std
::
next
(
std
::
begin
(
out
),
i
)
);
double
r
=
type_convert
<
float
>
(
ref
[
i
]
);
const
double
r
=
type_convert
<
float
>
(
*
std
::
next
(
std
::
begin
(
ref
),
i
)
);
err
=
std
::
abs
(
o
-
r
);
err
=
std
::
abs
(
o
-
r
);
if
(
err
>
atol
+
rtol
*
std
::
abs
(
r
)
||
!
std
::
isfinite
(
o
)
||
!
std
::
isfinite
(
r
))
if
(
err
>
atol
+
rtol
*
std
::
abs
(
r
)
||
!
std
::
isfinite
(
o
)
||
!
std
::
isfinite
(
r
))
{
{
max_err
=
err
>
max_err
?
err
:
max_err
;
max_err
=
err
>
max_err
?
err
:
max_err
;
...
@@ -151,26 +163,17 @@ check_err(span<const T> out,
...
@@ -151,26 +163,17 @@ check_err(span<const T> out,
return
res
;
return
res
;
}
}
template
<
typename
T
>
template
<
typename
Range
,
typename
RefRange
>
typename
std
::
enable_if
<
std
::
is_same
<
T
,
half_t
>::
value
,
bool
>::
type
std
::
enable_if_t
<
(
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
ranges
::
range_value_t
<
RefRange
>>
&&
check_err
(
const
std
::
vector
<
T
>&
out
,
std
::
is_integral_v
<
ranges
::
range_value_t
<
Range
>>
&&
const
std
::
vector
<
T
>&
ref
,
!
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
bhalf_t
>
)
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
double
rtol
=
1e-3
,
double
atol
=
1e-3
)
{
return
check_err
(
span
<
const
T
>
{
out
},
span
<
const
T
>
{
ref
},
msg
,
rtol
,
atol
);
}
template
<
typename
T
>
std
::
enable_if_t
<
(
std
::
is_integral_v
<
T
>
&&
!
std
::
is_same_v
<
T
,
bhalf_t
>
)
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
||
std
::
is_same_v
<
T
,
int4_t
>
||
std
::
is_same_v
<
ranges
::
range_value_t
<
Range
>
,
int4_t
>
#endif
#endif
,
,
bool
>
bool
>
check_err
(
const
std
::
vector
<
T
>
&
out
,
check_err
(
const
Range
&
out
,
const
std
::
vector
<
T
>
&
ref
,
const
RefRange
&
ref
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
const
std
::
string
&
msg
=
"Error: Incorrect results!"
,
double
=
0
,
double
=
0
,
double
atol
=
0
)
double
atol
=
0
)
...
@@ -188,9 +191,9 @@ check_err(const std::vector<T>& out,
...
@@ -188,9 +191,9 @@ check_err(const std::vector<T>& out,
int64_t
max_err
=
std
::
numeric_limits
<
int64_t
>::
min
();
int64_t
max_err
=
std
::
numeric_limits
<
int64_t
>::
min
();
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
{
{
int64_t
o
=
out
[
i
]
;
const
int64_t
o
=
*
std
::
next
(
std
::
begin
(
out
),
i
)
;
int64_t
r
=
ref
[
i
]
;
const
int64_t
r
=
*
std
::
next
(
std
::
begin
(
ref
),
i
)
;
err
=
std
::
abs
(
o
-
r
);
err
=
std
::
abs
(
o
-
r
);
if
(
err
>
atol
)
if
(
err
>
atol
)
{
{
...
...
library/include/ck/library/utility/convolution_parameter.hpp
View file @
95a83c6e
...
@@ -10,6 +10,8 @@
...
@@ -10,6 +10,8 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/library/utility/numeric.hpp"
namespace
ck
{
namespace
ck
{
namespace
utils
{
namespace
utils
{
namespace
conv
{
namespace
conv
{
...
@@ -55,10 +57,8 @@ struct ConvParam
...
@@ -55,10 +57,8 @@ struct ConvParam
// sizeof(InDataType) * (G * N * C * <input spatial lengths product>) +
// sizeof(InDataType) * (G * N * C * <input spatial lengths product>) +
return
sizeof
(
InDataType
)
*
return
sizeof
(
InDataType
)
*
(
G_
*
N_
*
C_
*
(
G_
*
N_
*
C_
*
std
::
accumulate
(
std
::
begin
(
input_spatial_lengths_
),
ck
::
accumulate_n
<
std
::
size_t
>
(
std
::
begin
(
input_spatial_lengths_
)
+
num_dim_spatial_
,
std
::
begin
(
input_spatial_lengths_
),
num_dim_spatial_
,
1
,
std
::
multiplies
<>
()));
static_cast
<
std
::
size_t
>
(
1
),
std
::
multiplies
<
std
::
size_t
>
()));
}
}
template
<
typename
WeiDataType
>
template
<
typename
WeiDataType
>
...
@@ -67,10 +67,8 @@ struct ConvParam
...
@@ -67,10 +67,8 @@ struct ConvParam
// sizeof(WeiDataType) * (G * K * C * <filter spatial lengths product>) +
// sizeof(WeiDataType) * (G * K * C * <filter spatial lengths product>) +
return
sizeof
(
WeiDataType
)
*
return
sizeof
(
WeiDataType
)
*
(
G_
*
K_
*
C_
*
(
G_
*
K_
*
C_
*
std
::
accumulate
(
std
::
begin
(
filter_spatial_lengths_
),
ck
::
accumulate_n
<
std
::
size_t
>
(
std
::
begin
(
filter_spatial_lengths_
)
+
num_dim_spatial_
,
std
::
begin
(
filter_spatial_lengths_
),
num_dim_spatial_
,
1
,
std
::
multiplies
<>
()));
static_cast
<
std
::
size_t
>
(
1
),
std
::
multiplies
<
std
::
size_t
>
()));
}
}
template
<
typename
OutDataType
>
template
<
typename
OutDataType
>
...
...
library/include/ck/library/utility/fill.hpp
View file @
95a83c6e
...
@@ -30,9 +30,10 @@ struct FillUniformDistribution
...
@@ -30,9 +30,10 @@ struct FillUniformDistribution
}
}
template
<
typename
ForwardRange
>
template
<
typename
ForwardRange
>
auto
operator
()(
ForwardRange
&&
range
)
->
std
::
void_t
<
decltype
(
auto
operator
()(
ForwardRange
&&
range
)
const
std
::
declval
<
FillUniformDistribution
>
()(
std
::
begin
(
std
::
forward
<
ForwardRange
>
(
range
)),
->
std
::
void_t
<
decltype
(
std
::
declval
<
const
FillUniformDistribution
&>
()(
std
::
end
(
std
::
forward
<
ForwardRange
>
(
range
))))
>
std
::
begin
(
std
::
forward
<
ForwardRange
>
(
range
)),
std
::
end
(
std
::
forward
<
ForwardRange
>
(
range
))))
>
{
{
(
*
this
)(
std
::
begin
(
std
::
forward
<
ForwardRange
>
(
range
)),
(
*
this
)(
std
::
begin
(
std
::
forward
<
ForwardRange
>
(
range
)),
std
::
end
(
std
::
forward
<
ForwardRange
>
(
range
)));
std
::
end
(
std
::
forward
<
ForwardRange
>
(
range
)));
...
@@ -72,6 +73,16 @@ struct FillUniformDistributionIntegerValue
...
@@ -72,6 +73,16 @@ struct FillUniformDistributionIntegerValue
std
::
generate
(
std
::
generate
(
first
,
last
,
[
&
dis
,
&
gen
]()
{
return
ck
::
type_convert
<
T
>
(
std
::
round
(
dis
(
gen
)));
});
first
,
last
,
[
&
dis
,
&
gen
]()
{
return
ck
::
type_convert
<
T
>
(
std
::
round
(
dis
(
gen
)));
});
}
}
template
<
typename
ForwardRange
>
auto
operator
()(
ForwardRange
&&
range
)
const
->
std
::
void_t
<
decltype
(
std
::
declval
<
const
FillUniformDistributionIntegerValue
&>
()(
std
::
begin
(
std
::
forward
<
ForwardRange
>
(
range
)),
std
::
end
(
std
::
forward
<
ForwardRange
>
(
range
))))
>
{
(
*
this
)(
std
::
begin
(
std
::
forward
<
ForwardRange
>
(
range
)),
std
::
end
(
std
::
forward
<
ForwardRange
>
(
range
)));
}
};
};
template
<
typename
T
>
template
<
typename
T
>
...
...
library/include/ck/library/utility/host_reduction.hpp
View file @
95a83c6e
...
@@ -96,10 +96,9 @@ struct ReductionHost
...
@@ -96,10 +96,9 @@ struct ReductionHost
static
constexpr
int
NumInvariantDim
=
Rank
-
NumReduceDim
;
static
constexpr
int
NumInvariantDim
=
Rank
-
NumReduceDim
;
std
::
vector
<
size_t
>
outStrides
;
std
::
vector
<
size_t
>
outStrides
;
std
::
vector
<
int
>
invariantDims
;
std
::
vector
<
int
>
reduceDims
;
IndexDataType
divider
;
IndexDataType
divider
;
std
::
array
<
size_t
,
NumReduceDim
>
reduceLengths
;
std
::
array
<
size_t
,
NumReduceDim
>
reduceLengths
;
std
::
array
<
size_t
,
NumReduceDim
>
reduceStrides
;
std
::
array
<
size_t
,
NumReduceDim
>
reduceStrides
;
std
::
array
<
size_t
,
NumInvariantDim
>
invariantLengths
;
std
::
array
<
size_t
,
NumInvariantDim
>
invariantLengths
;
...
@@ -110,15 +109,12 @@ struct ReductionHost
...
@@ -110,15 +109,12 @@ struct ReductionHost
ReductionHost
(
HostTensorDescriptor
&
inDesc
,
ReductionHost
(
HostTensorDescriptor
&
inDesc
,
HostTensorDescriptor
&
outDesc
,
HostTensorDescriptor
&
outDesc
,
const
std
::
vector
<
int
>&
invariantDims
_
,
const
std
::
array
<
int
,
NumInvariantDim
>
invariantDims
,
const
std
::
vector
<
int
>&
reduceDims
_
)
const
std
::
array
<
int
,
NumReduceDim
>
reduceDims
)
{
{
// this->outLengths = to_int_vector(outDesc.GetLengths());
// this->outLengths = to_int_vector(outDesc.GetLengths());
this
->
outStrides
=
outDesc
.
GetStrides
();
this
->
outStrides
=
outDesc
.
GetStrides
();
this
->
invariantDims
=
invariantDims_
;
this
->
reduceDims
=
reduceDims_
;
int
product
=
1
;
int
product
=
1
;
for
(
int
i
=
0
;
i
<
NumReduceDim
;
i
++
)
for
(
int
i
=
0
;
i
<
NumReduceDim
;
i
++
)
...
...
library/include/ck/library/utility/host_tensor.hpp
View file @
95a83c6e
...
@@ -14,6 +14,9 @@
...
@@ -14,6 +14,9 @@
#include "ck/utility/data_type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/span.hpp"
#include "ck/utility/span.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/ranges.hpp"
template
<
typename
Range
>
template
<
typename
Range
>
std
::
ostream
&
LogRange
(
std
::
ostream
&
os
,
Range
&&
range
,
std
::
string
delim
)
std
::
ostream
&
LogRange
(
std
::
ostream
&
os
,
Range
&&
range
,
std
::
string
delim
)
{
{
...
@@ -84,10 +87,10 @@ struct HostTensorDescriptor
...
@@ -84,10 +87,10 @@ struct HostTensorDescriptor
this
->
CalculateStrides
();
this
->
CalculateStrides
();
}
}
template
<
typename
Range
,
template
<
typename
Lengths
,
typename
=
std
::
enable_if_t
<
typename
=
std
::
enable_if_t
<
std
::
is_convertible_v
<
decltype
(
*
std
::
begin
(
std
::
declval
<
Range
>()))
,
std
::
size_t
>>>
std
::
is_convertible_v
<
ck
::
ranges
::
range_value_t
<
Lengths
>
,
std
::
size_t
>>>
HostTensorDescriptor
(
const
Range
&
lens
)
:
mLens
(
lens
.
begin
(),
lens
.
end
())
HostTensorDescriptor
(
const
Lengths
&
lens
)
:
mLens
(
lens
.
begin
(),
lens
.
end
())
{
{
this
->
CalculateStrides
();
this
->
CalculateStrides
();
}
}
...
@@ -102,13 +105,12 @@ struct HostTensorDescriptor
...
@@ -102,13 +105,12 @@ struct HostTensorDescriptor
{
{
}
}
template
<
template
<
typename
Lengths
,
typename
Range1
,
typename
Strides
,
typename
Range2
,
typename
=
std
::
enable_if_t
<
typename
=
std
::
enable_if_t
<
std
::
is_convertible_v
<
ck
::
ranges
::
range_value_t
<
Lengths
>,
std
::
size_t
>
&&
std
::
is_convertible_v
<
decltype
(
*
std
::
begin
(
std
::
declval
<
Range1
>())),
std
::
size_t
>
&&
std
::
is_convertible_v
<
ck
::
ranges
::
range_value_t
<
Strides
>
,
std
::
size_t
>>>
std
::
is_convertible_v
<
decltype
(
*
std
::
begin
(
std
::
declval
<
Range2
>
())),
std
::
size_t
>>>
HostTensorDescriptor
(
const
Lengths
&
lens
,
const
Strides
&
strides
)
HostTensorDescriptor
(
const
Range1
&
lens
,
const
Range2
&
strides
)
:
mLens
(
lens
.
begin
(),
lens
.
end
()),
mStrides
(
strides
.
begin
(),
strides
.
end
())
:
mLens
(
lens
.
begin
(),
lens
.
end
()),
mStrides
(
strides
.
begin
(),
strides
.
end
())
{
{
}
}
...
@@ -244,14 +246,20 @@ struct Tensor
...
@@ -244,14 +246,20 @@ struct Tensor
{
{
}
}
template
<
typename
X
>
template
<
typename
X
,
typename
Y
>
Tensor
(
std
::
vector
<
X
>
lens
)
:
mDesc
(
lens
),
mData
(
mDesc
.
GetElementSpaceSize
())
Tensor
(
std
::
initializer_list
<
X
>
lens
,
std
::
initializer_list
<
Y
>
strides
)
:
mDesc
(
lens
,
strides
),
mData
(
mDesc
.
GetElementSpaceSize
())
{
{
}
}
template
<
typename
X
,
typename
Y
>
template
<
typename
Lengths
>
Tensor
(
std
::
vector
<
X
>
lens
,
std
::
vector
<
Y
>
strides
)
Tensor
(
const
Lengths
&
lens
)
:
mDesc
(
lens
),
mData
(
mDesc
.
GetElementSpaceSize
())
:
mDesc
(
lens
,
strides
),
mData
(
mDesc
.
GetElementSpaceSize
())
{
}
template
<
typename
Lengths
,
typename
Strides
>
Tensor
(
const
Lengths
&
lens
,
const
Strides
&
strides
)
:
mDesc
(
lens
,
strides
),
mData
(
GetElementSpaceSize
())
{
{
}
}
...
@@ -261,10 +269,10 @@ struct Tensor
...
@@ -261,10 +269,10 @@ struct Tensor
Tensor
<
OutT
>
CopyAsType
()
const
Tensor
<
OutT
>
CopyAsType
()
const
{
{
Tensor
<
OutT
>
ret
(
mDesc
);
Tensor
<
OutT
>
ret
(
mDesc
);
for
(
size_t
i
=
0
;
i
<
mData
.
size
();
i
++
)
{
ck
::
ranges
::
transform
(
ret
.
mData
[
i
]
=
ck
::
type_convert
<
OutT
>
(
mData
[
i
]
);
mData
,
ret
.
mData
.
begin
(),
[](
auto
value
)
{
return
ck
::
type_convert
<
OutT
>
(
value
);
}
);
}
return
ret
;
return
ret
;
}
}
...
@@ -294,13 +302,7 @@ struct Tensor
...
@@ -294,13 +302,7 @@ struct Tensor
std
::
size_t
GetElementSpaceSizeInBytes
()
const
{
return
sizeof
(
T
)
*
GetElementSpaceSize
();
}
std
::
size_t
GetElementSpaceSizeInBytes
()
const
{
return
sizeof
(
T
)
*
GetElementSpaceSize
();
}
void
SetZero
()
void
SetZero
()
{
ck
::
ranges
::
fill
<
T
>
(
mData
,
0
);
}
{
for
(
auto
&
v
:
mData
)
{
v
=
T
{
0
};
}
}
template
<
typename
F
>
template
<
typename
F
>
void
ForEach_impl
(
F
&&
f
,
std
::
vector
<
size_t
>&
idx
,
size_t
rank
)
void
ForEach_impl
(
F
&&
f
,
std
::
vector
<
size_t
>&
idx
,
size_t
rank
)
...
...
Prev
1
…
14
15
16
17
18
19
20
21
22
…
31
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment