Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
24af0144
Unverified
Commit
24af0144
authored
Nov 12, 2022
by
Po Yen Chen
Committed by
GitHub
Nov 12, 2022
Browse files
Merge branch 'develop' into gemm_layernorm_welford
parents
961f5e9e
b79bbbc2
Changes
813
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
486 additions
and
235 deletions
+486
-235
test/normalization/test_layernorm2d_fp16.cpp
test/normalization/test_layernorm2d_fp16.cpp
+45
-0
test/normalization/test_layernorm2d_fp32.cpp
test/normalization/test_layernorm2d_fp32.cpp
+45
-0
test/reference_conv_fwd/reference_conv_fwd.cpp
test/reference_conv_fwd/reference_conv_fwd.cpp
+9
-9
test/softmax/CMakeLists.txt
test/softmax/CMakeLists.txt
+9
-9
test/softmax/test_softmax_fp16.cpp
test/softmax/test_softmax_fp16.cpp
+0
-34
test/softmax/test_softmax_fp32.cpp
test/softmax/test_softmax_fp32.cpp
+0
-34
test/softmax/test_softmax_int8.cpp
test/softmax/test_softmax_int8.cpp
+0
-30
test/softmax/test_softmax_interface.cpp
test/softmax/test_softmax_interface.cpp
+86
-0
test/softmax/test_softmax_rank3.cpp
test/softmax/test_softmax_rank3.cpp
+34
-0
test/softmax/test_softmax_rank4.cpp
test/softmax/test_softmax_rank4.cpp
+34
-0
test/softmax/test_softmax_ut_cases.inc
test/softmax/test_softmax_ut_cases.inc
+60
-0
test/softmax/test_softmax_util.hpp
test/softmax/test_softmax_util.hpp
+94
-112
test/space_filling_curve/space_filling_curve.cpp
test/space_filling_curve/space_filling_curve.cpp
+70
-7
No files found.
test/normalization/test_layernorm2d_fp16.cpp
0 → 100644
View file @
24af0144
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_layernorm_impl.hpp"
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
ck
::
index_t
;
template
<
typename
Tuple
>
class
TestLayernorm2d
:
public
::
testing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
void
Run
()
{
// [N, D], reduce D
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{
{
4
,
256
},
{
8
,
511
},
{
9
,
1032
},
{
4
,
2048
},
{
1
,
8192
},
{
4000
,
2000
}};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_layernorm_impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
2
>
(
true
,
2
,
false
,
false
,
length
);
EXPECT_TRUE
(
success
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>>
;
TYPED_TEST_SUITE
(
TestLayernorm2d
,
KernelTypes
);
TYPED_TEST
(
TestLayernorm2d
,
Test_FP16
)
{
this
->
Run
();
}
test/normalization/test_layernorm2d_fp32.cpp
0 → 100644
View file @
24af0144
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_layernorm_impl.hpp"
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
ck
::
index_t
;
template
<
typename
Tuple
>
class
TestLayernorm2d
:
public
::
testing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
void
Run
()
{
// [N, D], reduce D
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{
{
4
,
256
},
{
8
,
511
},
{
9
,
1032
},
{
4
,
2048
},
{
1
,
8192
},
{
4000
,
2000
}};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_layernorm_impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
2
>
(
true
,
2
,
false
,
false
,
length
);
EXPECT_TRUE
(
success
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>>
;
TYPED_TEST_SUITE
(
TestLayernorm2d
,
KernelTypes
);
TYPED_TEST
(
TestLayernorm2d
,
Test_FP32
)
{
this
->
Run
();
}
test/reference_conv_fwd/reference_conv_fwd.cpp
View file @
24af0144
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/fill.hpp"
#include "ck/library/utility/fill.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
@@ -54,7 +55,7 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParam& conv_param,
...
@@ -54,7 +55,7 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParam& conv_param,
fill_input_op
(
input
.
begin
(),
input
.
end
());
fill_input_op
(
input
.
begin
(),
input
.
end
());
fill_weights_op
(
weights
.
begin
(),
weights
.
end
());
fill_weights_op
(
weights
.
begin
(),
weights
.
end
());
std
::
fill
(
host_output
.
begin
(),
host_output
.
end
(),
OutDataType
(
0.
f
)
)
;
ck
::
ranges
::
fill
<
OutDataType
>
(
host_output
,
0.
f
);
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InDataType
,
InDataType
,
...
@@ -122,7 +123,7 @@ TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
...
@@ -122,7 +123,7 @@ TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
508.5};
508.5};
EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
}
TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
...
@@ -149,7 +150,7 @@ TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
...
@@ -149,7 +150,7 @@ TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5};
1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5};
EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
}
TEST(ReferenceConvolutionFWD, Conv1DGNWC)
TEST(ReferenceConvolutionFWD, Conv1DGNWC)
...
@@ -178,7 +179,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWC)
...
@@ -178,7 +179,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWC)
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
}
TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
...
@@ -207,7 +208,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
...
@@ -207,7 +208,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
}
TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
...
@@ -301,7 +302,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
...
@@ -301,7 +302,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4};
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4};
EXPECT_TRUE(ck::utils::check_err(
EXPECT_TRUE(ck::utils::check_err(
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2, ref_data, "Error: incorrect results!"));
}
}
#endif
#endif
...
@@ -340,8 +341,7 @@ TEST(ReferenceConvolutionFWD, Conv3DGNCDHW)
...
@@ -340,8 +341,7 @@ TEST(ReferenceConvolutionFWD, Conv3DGNCDHW)
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
ref_dims
,
"Error [case 1]: wrong output tensor dimensions!"
));
"Error [case 1]: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
,
ref_data
,
"Error [case 1]: incorrect results!"
));
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 1]: incorrect results!"
));
}
}
TEST
(
ReferenceConvolutionFWD
,
Conv3DGNCDHWStridesDilations
)
TEST
(
ReferenceConvolutionFWD
,
Conv3DGNCDHWStridesDilations
)
...
@@ -388,5 +388,5 @@ TEST(ReferenceConvolutionFWD, Conv3DGNCDHWStridesDilations)
...
@@ -388,5 +388,5 @@ TEST(ReferenceConvolutionFWD, Conv3DGNCDHWStridesDilations)
ref_dims
,
ref_dims
,
"Error [case 2]: wrong output tensor dimensions!"
));
"Error [case 2]: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 2]: incorrect results!"
,
1e-4
f
,
1e-6
f
));
out_tensor
,
ref_data
,
"Error [case 2]: incorrect results!"
,
1e-4
f
,
1e-6
f
));
}
}
test/softmax/CMakeLists.txt
View file @
24af0144
add_custom_target
(
test_softmax
)
add_custom_target
(
test_softmax
)
add_gtest_executable
(
test_softmax_
fp32
test_softmax_
fp32
.cpp
)
add_gtest_executable
(
test_softmax_
rank3
test_softmax_
rank3
.cpp
)
add_gtest_executable
(
test_softmax_
fp16
test_softmax_
fp16
.cpp
)
add_gtest_executable
(
test_softmax_
rank4
test_softmax_
rank4
.cpp
)
add_gtest_executable
(
test_softmax_int
8
test_softmax_int
8
.cpp
)
add_gtest_executable
(
test_softmax_int
erface
test_softmax_int
erface
.cpp
)
target_link_libraries
(
test_softmax_
fp32
PRIVATE utility
)
target_link_libraries
(
test_softmax_
rank3
PRIVATE utility
device_softmax_instance
)
target_link_libraries
(
test_softmax_
fp16
PRIVATE utility
)
target_link_libraries
(
test_softmax_
rank4
PRIVATE utility
device_softmax_instance
)
target_link_libraries
(
test_softmax_int
8
PRIVATE utility
)
target_link_libraries
(
test_softmax_int
erface
PRIVATE utility
device_softmax_instance
)
add_dependencies
(
test_softmax test_softmax_
fp32
)
add_dependencies
(
test_softmax test_softmax_
rank3
)
add_dependencies
(
test_softmax test_softmax_
fp16
)
add_dependencies
(
test_softmax test_softmax_
rank4
)
add_dependencies
(
test_softmax test_softmax_int
8
)
add_dependencies
(
test_softmax test_softmax_int
erface
)
test/softmax/test_softmax_fp16.cpp
deleted
100644 → 0
View file @
961f5e9e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxFP16
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
ck
::
half_t
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
4
>>
,
// mixed precision
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxFP16
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxFP16
,
Test_FP16
)
{
this
->
Run
();
}
test/softmax/test_softmax_fp32.cpp
deleted
100644 → 0
View file @
961f5e9e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxFP32
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
float
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
8
>>
,
// mixed precision
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxFP32
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxFP32
,
Test_FP32
)
{
this
->
Run
();
}
test/softmax/test_softmax_int8.cpp
deleted
100644 → 0
View file @
961f5e9e
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxINT8
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxINT8
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxINT8
,
Test_INT8
)
{
this
->
Run
();
}
test/softmax/test_softmax_interface.cpp
0 → 100644
View file @
24af0144
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
class
TestSoftmaxInterface
:
public
::
testing
::
Test
{
protected:
template
<
ck
::
index_t
Rank
,
ck
::
index_t
NumReduceDims
>
using
SoftmaxInstance
=
ck
::
DeviceSoftmaxInstanceWrapper
<
Rank
,
NumReduceDims
,
256
,
1
,
256
,
1
,
8
,
1
,
8
,
8
>
;
};
TEST_F
(
TestSoftmaxInterface
,
IncorrectReduceDims
)
{
std
::
vector
<
ck
::
index_t
>
lengths
{
2
,
128
,
1536
};
std
::
vector
<
ck
::
index_t
>
strides
{
128
*
1536
,
1536
,
1
};
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{
-
1
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{
3
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{
0
,
1
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
2
>
{}.
IsSupported
(
lengths
,
strides
,
{
2
,
-
1
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
2
>
{}.
IsSupported
(
lengths
,
strides
,
{
2
,
4
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
2
>
{}.
IsSupported
(
lengths
,
strides
,
{
2
})),
std
::
runtime_error
);
}
TEST_F
(
TestSoftmaxInterface
,
IncorrectLengthsSize
)
{
std
::
vector
<
ck
::
index_t
>
lengths
{
128
,
1536
};
std
::
vector
<
ck
::
index_t
>
strides
{
128
*
1536
,
1536
,
1
};
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
2
};
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
({
128
,
1536
},
strides
,
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
({},
strides
,
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
({
1
,
8
,
128
,
1536
},
strides
,
reduce_dims
)),
std
::
runtime_error
);
}
TEST_F
(
TestSoftmaxInterface
,
IncorrectStridesSize
)
{
std
::
vector
<
ck
::
index_t
>
lengths
{
2
,
128
,
1536
};
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
2
};
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
{
1536
,
1
},
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
{},
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
{
1
,
2
,
3
,
4
},
reduce_dims
)),
std
::
runtime_error
);
}
TEST_F
(
TestSoftmaxInterface
,
UnsupportedLengths
)
{
using
SoftmaxInstance1
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
1
,
256
,
1
,
256
,
1
,
8
,
1
,
8
,
4
>
;
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
128
,
1500
},
{
128
*
1500
,
1500
,
1
},
{
2
}));
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
127
,
1536
},
{
127
*
1536
,
1536
,
1
},
{
2
}));
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
128
,
1537
},
{
128
*
1537
,
1537
,
1
},
{
2
}));
// Reduction of middle dimensions
using
SoftmaxInstance2
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
3
,
256
,
8
,
32
,
8
,
8
,
0
,
8
,
4
>
;
EXPECT_FALSE
(
SoftmaxInstance2
{}.
IsSupported
({
2
,
128
,
1536
},
{
128
*
1536
,
1536
,
1
},
{
0
,
1
,
2
}));
// Reduction of middle dimensions
using
SoftmaxInstance3
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
1
,
256
,
8
,
32
,
8
,
8
,
0
,
4
,
8
>
;
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
128
,
1536
},
{
128
*
1536
,
1536
,
1
},
{
2
}));
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
128
,
1537
},
{
128
*
1537
,
1537
,
1
},
{
1
}));
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
128
,
1540
},
{
128
*
1540
,
1540
,
1
},
{
1
}));
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
127
,
1536
},
{
127
*
1536
,
1536
,
1
},
{
1
}));
}
TEST_F
(
TestSoftmaxInterface
,
UnsupportedInstance
)
{
// Instance with InSrcVectorDim = 1, can't reduce middle dims if in/out vec size != 1
using
SoftmaxInstance1
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
1
,
256
,
8
,
32
,
1
,
8
,
1
,
8
,
8
>
;
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
128
,
1024
},
{
128
*
1024
,
1024
,
1
},
{
0
}));
}
test/softmax/test_softmax_rank3.cpp
0 → 100644
View file @
24af0144
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm>
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
I8
=
int8_t
;
template
<
typename
Tuple
>
class
TestSoftmax
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank
std
::
tuple
<
F16
,
F32
,
F16
,
I
<
3
>>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I
<
3
>>
,
std
::
tuple
<
I8
,
F32
,
I8
,
I
<
3
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmax
,
KernelTypes
);
#include "test_softmax_ut_cases.inc"
test/softmax/test_softmax_rank4.cpp
0 → 100644
View file @
24af0144
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm>
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
I8
=
int8_t
;
template
<
typename
Tuple
>
class
TestSoftmax
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank
std
::
tuple
<
F16
,
F32
,
F16
,
I
<
4
>>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I
<
4
>>
,
std
::
tuple
<
I8
,
F32
,
I8
,
I
<
4
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmax
,
KernelTypes
);
#include "test_softmax_ut_cases.inc"
test/softmax/test_softmax_ut_cases.inc
0 → 100644
View file @
24af0144
#pragma once
TYPED_TEST
(
TestSoftmax
,
ReduceOutermostDim
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
this
->
Rank
-
1
};
this
->
Run
(
reduce_dims
);
}
TYPED_TEST
(
TestSoftmax
,
ReduceMiddleDim
)
{
for
(
int
dim
=
0
;
dim
<
this
->
Rank
-
1
;
++
dim
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
dim
};
this
->
Run
(
reduce_dims
);
}
}
TYPED_TEST
(
TestSoftmax
,
ReduceMultipleDimsWithOutermost
)
{
for
(
int
dim
=
0
;
dim
<
this
->
Rank
-
1
;
++
dim
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
dim
,
this
->
Rank
-
1
};
this
->
Run
(
reduce_dims
);
}
}
TYPED_TEST
(
TestSoftmax
,
ReduceMultipleMiddleDims
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
0
,
1
};
if
(
this
->
Rank
>=
3
)
{
this
->
Run
(
reduce_dims
);
}
if
(
this
->
Rank
>=
4
)
{
reduce_dims
=
std
::
vector
<
ck
::
index_t
>
{
0
,
2
};
this
->
Run
(
reduce_dims
);
reduce_dims
=
std
::
vector
<
ck
::
index_t
>
{
0
,
1
,
2
};
this
->
Run
(
reduce_dims
);
}
}
TYPED_TEST
(
TestSoftmax
,
ReduceAllDims
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
(
this
->
Rank
);
std
::
iota
(
std
::
begin
(
reduce_dims
),
std
::
end
(
reduce_dims
),
0
);
this
->
Run
(
reduce_dims
);
}
TYPED_TEST
(
TestSoftmax
,
ReduceOddLengths
)
{
this
->
in_lengths_
=
{{
3
,
63
,
1032
}};
if
(
this
->
Rank
>=
4
)
{
this
->
in_lengths_
=
{{
1
,
3
,
63
,
1032
}};
}
this
->
Run
({
this
->
Rank
-
1
});
this
->
Run
({
this
->
Rank
-
2
});
}
test/softmax/test_softmax_util.hpp
View file @
24af0144
...
@@ -3,19 +3,17 @@
...
@@ -3,19 +3,17 @@
#pragma once
#pragma once
#include <string>
#include <sstream>
#include <tuple>
#include <vector>
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "include/ck/utility/data_type.hpp"
#include "ck/library/utility/check_err.hpp"
#include "profiler/include/profile_softmax_impl.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace
ck
{
namespace
ck
{
...
@@ -35,126 +33,110 @@ template <typename Tuple>
...
@@ -35,126 +33,110 @@ template <typename Tuple>
class
TestSoftmax
:
public
::
testing
::
Test
class
TestSoftmax
:
public
::
testing
::
Test
{
{
protected:
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
3
,
Tuple
>
{}.
value
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
3
,
Tuple
>
{}.
value
;
static
constexpr
index_t
NumReduceDim
=
std
::
tuple_element_t
<
4
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BlockSize
=
std
::
tuple_element_t
<
5
,
Tuple
>
{}.
value
;
public:
static
constexpr
index_t
MThreadClusterSize
=
std
::
tuple_element_t
<
6
,
Tuple
>
{}.
value
;
std
::
vector
<
std
::
vector
<
index_t
>>
in_lengths_
=
{{
2
,
128
,
1024
},
{
4
,
16
,
8448
},
{
128
,
128
,
64
}};
static
constexpr
index_t
KThreadClusterSize
=
std
::
tuple_element_t
<
7
,
Tuple
>
{}.
value
;
std
::
vector
<
std
::
vector
<
AccDataType
>>
scales_
=
{{
2
,
0
},
{
0
,
2
},
{
2
,
2
}};
static
constexpr
index_t
MThreadSliceSize
=
std
::
tuple_element_t
<
8
,
Tuple
>
{}.
value
;
bool
bench_
=
false
;
// measure kernel performance
static
constexpr
index_t
KThreadSliceSize
=
std
::
tuple_element_t
<
9
,
Tuple
>
{}.
value
;
bool
verify_
=
true
;
static
constexpr
index_t
InSrcVectorDim
=
std
::
tuple_element_t
<
10
,
Tuple
>
{}.
value
;
static
constexpr
index_t
InSrcVectorSize
=
std
::
tuple_element_t
<
11
,
Tuple
>
{}.
value
;
void
SetUp
()
override
static
constexpr
index_t
OutDstVectorSize
=
std
::
tuple_element_t
<
12
,
Tuple
>
{}.
value
;
using
ReferenceInstance
=
tensor_operation
::
host
::
ReferenceSoftmax
<
InDataType
,
OutDataType
,
AccDataType
>
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
DeviceInstance
=
tensor_operation
::
device
::
DeviceSoftmaxImpl
<
InDataType
,
AccDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
InSrcVectorDim
,
InSrcVectorSize
,
OutDstVectorSize
>
;
TestSoftmax
()
:
ref_instance_invoker_
(
ReferenceInstance
{}.
MakeInvoker
())
{}
void
RunSingle
(
std
::
vector
<
index_t
>
in_length
,
AccDataType
alpha
,
AccDataType
beta
)
{
{
std
::
vector
<
index_t
>
reduce_dims
(
NumReduceDim
);
if
constexpr
(
Rank
==
4
)
std
::
iota
(
reduce_dims
.
begin
(),
reduce_dims
.
end
(),
Rank
-
NumReduceDim
);
Tensor
<
InDataType
>
in
(
in_length
);
Tensor
<
OutDataType
>
out
(
in_length
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
out
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
Tensor
<
OutDataType
>
out_ref
(
out
);
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpaceSize
());
in_dev
.
ToDevice
(
in
.
mData
.
data
());
out_dev
.
ToDevice
(
out
.
mData
.
data
());
std
::
vector
<
index_t
>
i_in_lengths
(
in
.
mDesc
.
GetLengths
().
begin
(),
in
.
mDesc
.
GetLengths
().
end
());
std
::
vector
<
index_t
>
i_in_strides
(
in
.
mDesc
.
GetStrides
().
begin
(),
in
.
mDesc
.
GetStrides
().
end
());
auto
device_instance
=
DeviceInstance
{};
auto
argument_ptr
=
device_instance
.
MakeArgumentPointer
(
i_in_lengths
,
i_in_strides
,
reduce_dims
,
&
alpha
,
&
beta
,
in_dev
.
GetDeviceBuffer
(),
out_dev
.
GetDeviceBuffer
(),
PassThrough
{},
PassThrough
{});
if
(
!
device_instance
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
// std::cout << "Skipped due to unsupported argument: "
in_lengths_
=
std
::
vector
<
std
::
vector
<
index_t
>>
{
// << "input lengths = [" << serialize_range(in_length) << "], "
{
1
,
2
,
128
,
1024
},
{
2
,
4
,
16
,
8448
},
{
1
,
128
,
128
,
64
}};
// << "scaler = [" << alpha << ", " << beta << "]." << std::endl;
return
;
}
}
}
auto
invoker_ptr
=
device_instance
.
MakeInvokerPointer
();
void
RunSingle
(
std
::
vector
<
index_t
>
in_length
,
invoker_ptr
->
Run
(
argument_ptr
.
get
());
std
::
vector
<
index_t
>
reduce_dims
,
AccDataType
alpha
,
ref_instance_invoker_
.
Run
({
in
,
out_ref
,
alpha
,
beta
,
reduce_dims
});
AccDataType
beta
)
{
out_dev
.
FromDevice
(
out
.
mData
.
data
());
int
init_method
=
1
;
// integer value initialization
bool
log
=
false
;
bool
pass
;
std
::
vector
<
ck
::
index_t
>
strides
;
// intenionally empty, to get packed layout.
bool
pass
=
ck
::
profiler
::
profile_softmax_impl
<
InDataType
,
AccDataType
,
OutDataType
,
Rank
>
(
if
(
std
::
is_same
<
InDataType
,
int8_t
>::
value
)
verify_
,
init_method
,
log
,
bench_
,
in_length
,
strides
,
reduce_dims
,
alpha
,
beta
);
{
EXPECT_TRUE
(
pass
);
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
}
out
.
mData
,
out_ref
.
mData
,
"Error: Incorrect results!"
,
0
,
1
));
}
else
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
out
.
mData
,
out_ref
.
mData
));
}
if
(
!
pass
)
void
Run
(
std
::
vector
<
index_t
>
reduce_dims
=
{})
{
if
(
reduce_dims
.
empty
())
{
{
FAIL
()
<<
"Failure in input lengths = ["
<<
serialize_range
(
in_length
)
<<
"], "
reduce_dims
.
push_back
(
Rank
-
1
);
<<
"scaler = ["
<<
alpha
<<
", "
<<
beta
<<
"]."
;
}
}
}
void
Run
()
{
for
(
auto
in_length
:
this
->
in_lengths_
)
for
(
auto
in_length
:
this
->
in_lengths_
)
{
{
for
(
auto
scale
:
this
->
scales_
)
for
(
auto
scale
:
this
->
scales_
)
{
{
this
->
RunSingle
(
in_length
,
scale
[
0
],
scale
[
1
]);
this
->
RunSingle
(
in_length
,
reduce_dims
,
scale
[
0
],
scale
[
1
]);
}
}
}
}
}
}
};
std
::
vector
<
std
::
vector
<
index_t
>>
in_lengths_
=
{
template
<
index_t
Rank
,
{
1
,
8
,
128
},
{
2
,
128
,
1024
},
{
3
,
9
,
1032
},
{
4
,
4
,
2048
},
{
8
,
1
,
8192
}};
index_t
NumReduceDim
,
std
::
vector
<
std
::
vector
<
AccDataType
>>
scales_
=
{{
1
,
0
},
{
1
,
1
},
{
0
,
1
},
{
2
,
2
}};
index_t
BlockSize
,
index_t
MThreadClusterSize
,
typename
ReferenceInstance
::
Invoker
ref_instance_invoker_
;
index_t
KThreadClusterSize
,
index_t
MThreadSliceSize
,
index_t
KThreadSliceSize
,
index_t
InSrcVectorDim
,
index_t
InSrcVectorSize
,
index_t
OutDstVectorSize
>
struct
DeviceSoftmaxInstanceWrapper
{
using
F16
=
half_t
;
using
F32
=
float
;
using
Pass
=
tensor_operation
::
element_wise
::
PassThrough
;
using
InDataType
=
F16
;
using
AccDataType
=
F32
;
using
OutDataType
=
F16
;
using
InElementOp
=
Pass
;
using
AccElementOp
=
Pass
;
using
DeviceSoftmaxInstance
=
tensor_operation
::
device
::
DeviceSoftmaxImpl
<
InDataType
,
AccDataType
,
OutDataType
,
InElementOp
,
AccElementOp
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
InSrcVectorDim
,
InSrcVectorSize
,
OutDstVectorSize
>
;
bool
IsSupported
(
const
std
::
vector
<
index_t
>
in_lengths
,
const
std
::
vector
<
index_t
>
in_strides
,
const
std
::
vector
<
index_t
>
reduce_dims
)
const
{
auto
softmax
=
DeviceSoftmaxInstance
{};
auto
argument
=
softmax
.
MakeArgument
(
in_lengths
,
in_strides
,
reduce_dims
,
1
,
// alpha
1
,
// beta
nullptr
,
// in_dev
nullptr
,
// in_out
Pass
{},
// in elementwise op
Pass
{});
// acc elementwise op
return
softmax
.
IsSupportedArgument
(
argument
);
}
};
};
}
// namespace ck
}
// namespace ck
test/space_filling_curve/space_filling_curve.cpp
View file @
24af0144
...
@@ -12,28 +12,91 @@
...
@@ -12,28 +12,91 @@
using
namespace
ck
;
using
namespace
ck
;
void
traverse_using_space_filling_curve
();
void
traverse_using_space_filling_curve_linear
();
void
traverse_using_space_filling_curve_snakecurved
();
int
main
(
int
argc
,
char
**
argv
)
int
main
(
int
argc
,
char
**
argv
)
{
{
(
void
)
argc
;
(
void
)
argc
;
(
void
)
argv
;
(
void
)
argv
;
traverse_using_space_filling_curve
();
traverse_using_space_filling_curve_linear
();
traverse_using_space_filling_curve_snakecurved
();
return
0
;
return
0
;
}
}
void
traverse_using_space_filling_curve
()
void
traverse_using_space_filling_curve
_linear
()
{
{
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
using
TensorLengths
=
Sequence
<
16
,
10
,
9
>
;
using
TensorLengths
=
Sequence
<
3
,
2
,
2
>
;
using
DimAccessOrder
=
Sequence
<
2
,
0
,
1
>
;
using
DimAccessOrder
=
Sequence
<
2
,
0
,
1
>
;
using
ScalarsPerAccess
=
Sequence
<
4
,
2
,
3
>
;
using
ScalarsPerAccess
=
Sequence
<
1
,
1
,
1
>
;
using
SpaceFillingCurve
=
SpaceFillingCurve
<
TensorLengths
,
DimAccessOrder
,
ScalarsPerAccess
>
;
using
SpaceFillingCurve
=
SpaceFillingCurve
<
TensorLengths
,
DimAccessOrder
,
ScalarsPerAccess
,
false
>
;
constexpr
auto
expected
=
make_tuple
(
make_tuple
(
0
,
0
,
0
),
make_tuple
(
0
,
1
,
0
),
make_tuple
(
1
,
0
,
0
),
make_tuple
(
1
,
1
,
0
),
make_tuple
(
2
,
0
,
0
),
make_tuple
(
2
,
1
,
0
),
make_tuple
(
0
,
0
,
1
),
make_tuple
(
0
,
1
,
1
),
make_tuple
(
1
,
0
,
1
),
make_tuple
(
1
,
1
,
1
),
make_tuple
(
2
,
0
,
1
),
make_tuple
(
2
,
1
,
1
));
constexpr
index_t
num_access
=
SpaceFillingCurve
::
GetNumOfAccess
();
static_assert
(
num_access
==
reduce_on_sequence
(
TensorLengths
{}
/
ScalarsPerAccess
{},
math
::
multiplies
{},
Number
<
1
>
{}));
static_for
<
1
,
num_access
,
1
>
{}([
&
](
auto
i
)
{
constexpr
auto
idx_curr
=
SpaceFillingCurve
::
GetIndex
(
i
);
static_assert
(
idx_curr
[
I0
]
==
expected
[
i
][
I0
]);
static_assert
(
idx_curr
[
I1
]
==
expected
[
i
][
I1
]);
static_assert
(
idx_curr
[
I2
]
==
expected
[
i
][
I2
]);
constexpr
auto
backward_step
=
SpaceFillingCurve
::
GetBackwardStep
(
i
);
constexpr
auto
expected_step
=
expected
[
i
-
I1
]
-
expected
[
i
];
static_assert
(
backward_step
[
I0
]
==
expected_step
[
I0
]);
static_assert
(
backward_step
[
I1
]
==
expected_step
[
I1
]);
static_assert
(
backward_step
[
I2
]
==
expected_step
[
I2
]);
});
static_for
<
0
,
num_access
-
1
,
1
>
{}([
&
](
auto
i
)
{
constexpr
auto
idx_curr
=
SpaceFillingCurve
::
GetIndex
(
i
);
static_assert
(
idx_curr
[
I0
]
==
expected
[
i
][
I0
]);
static_assert
(
idx_curr
[
I1
]
==
expected
[
i
][
I1
]);
static_assert
(
idx_curr
[
I2
]
==
expected
[
i
][
I2
]);
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
i
);
constexpr
auto
expected_step
=
expected
[
i
+
I1
]
-
expected
[
i
];
static_assert
(
forward_step
[
I0
]
==
expected_step
[
I0
]);
static_assert
(
forward_step
[
I1
]
==
expected_step
[
I1
]);
static_assert
(
forward_step
[
I2
]
==
expected_step
[
I2
]);
});
}
void
traverse_using_space_filling_curve_snakecurved
()
{
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
using
TensorLengths
=
Sequence
<
16
,
10
,
9
>
;
using
DimAccessOrder
=
Sequence
<
2
,
0
,
1
>
;
using
ScalarsPerAccess
=
Sequence
<
4
,
2
,
3
>
;
using
SpaceFillingCurve
=
SpaceFillingCurve
<
TensorLengths
,
DimAccessOrder
,
ScalarsPerAccess
,
true
>
;
constexpr
auto
expected
=
make_tuple
(
make_tuple
(
0
,
0
,
0
),
constexpr
auto
expected
=
make_tuple
(
make_tuple
(
0
,
0
,
0
),
make_tuple
(
0
,
2
,
0
),
make_tuple
(
0
,
2
,
0
),
...
...
Prev
1
…
37
38
39
40
41
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment