Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
05ee41c3
Commit
05ee41c3
authored
Nov 30, 2022
by
Rosty Geyyer
Browse files
Merge branch 'develop' into lwpck-471
parents
37116c98
ad541ad6
Changes
436
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
400 additions
and
449 deletions
+400
-449
test/normalization/CMakeLists.txt
test/normalization/CMakeLists.txt
+4
-4
test/normalization/test_groupnorm_fp16.cpp
test/normalization/test_groupnorm_fp16.cpp
+1
-1
test/normalization/test_groupnorm_fp32.cpp
test/normalization/test_groupnorm_fp32.cpp
+1
-1
test/normalization/test_layernorm2d_fp16.cpp
test/normalization/test_layernorm2d_fp16.cpp
+34
-18
test/normalization/test_layernorm2d_fp32.cpp
test/normalization/test_layernorm2d_fp32.cpp
+34
-18
test/normalization/test_layernorm2d_util.hpp
test/normalization/test_layernorm2d_util.hpp
+0
-179
test/reference_conv_fwd/reference_conv_fwd.cpp
test/reference_conv_fwd/reference_conv_fwd.cpp
+9
-9
test/softmax/CMakeLists.txt
test/softmax/CMakeLists.txt
+9
-9
test/softmax/test_softmax_fp16.cpp
test/softmax/test_softmax_fp16.cpp
+0
-34
test/softmax/test_softmax_fp32.cpp
test/softmax/test_softmax_fp32.cpp
+0
-34
test/softmax/test_softmax_int8.cpp
test/softmax/test_softmax_int8.cpp
+0
-30
test/softmax/test_softmax_interface.cpp
test/softmax/test_softmax_interface.cpp
+86
-0
test/softmax/test_softmax_rank3.cpp
test/softmax/test_softmax_rank3.cpp
+34
-0
test/softmax/test_softmax_rank4.cpp
test/softmax/test_softmax_rank4.cpp
+34
-0
test/softmax/test_softmax_ut_cases.inc
test/softmax/test_softmax_ut_cases.inc
+60
-0
test/softmax/test_softmax_util.hpp
test/softmax/test_softmax_util.hpp
+94
-112
No files found.
test/normalization/CMakeLists.txt
View file @
05ee41c3
...
...
@@ -3,10 +3,11 @@ add_custom_target(test_layernorm)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp32 PRIVATE utility device_normalization_instance
)
...
...
@@ -14,4 +15,3 @@ add_dependencies(test_layernorm test_layernorm2d_fp32)
add_dependencies
(
test_layernorm test_layernorm2d_fp16
)
add_dependencies
(
test_layernorm test_groupnorm_fp16
)
add_dependencies
(
test_layernorm test_groupnorm_fp32
)
test/normalization/test_groupnorm_fp16.cpp
View file @
05ee41c3
...
...
@@ -20,7 +20,7 @@ class TestGroupnorm : public ::testing::Test
void
Run
()
{
// N, H, W, G, C
//
[
N, H, W, G, C
], reduce H, W, C
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{{
1
,
1
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
,
5
},
{
256
,
9
,
9
,
9
,
9
},
...
...
test/normalization/test_groupnorm_fp32.cpp
View file @
05ee41c3
...
...
@@ -20,7 +20,7 @@ class TestGroupnorm : public ::testing::Test
void
Run
()
{
// N, H, W, G, C
//
[
N, H, W, G, C
], reduce H, W, C
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{{
1
,
1
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
,
5
},
{
256
,
9
,
9
,
9
,
9
},
...
...
test/normalization/test_layernorm2d_fp16.cpp
View file @
05ee41c3
...
...
@@ -2,28 +2,44 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "
test
_layernorm
2d_uti
l.hpp"
#include "
profiler/include/profile
_layernorm
_imp
l.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
ck
::
index_t
;
template
<
typename
Tuple
>
class
TestLayernorm2d
FP16
:
public
ck
::
T
est
Layernorm2d
<
Tuple
>
class
TestLayernorm2d
:
public
::
t
est
ing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
void
Run
()
{
// [N, D], reduce D
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{
{
4
,
256
},
{
8
,
511
},
{
9
,
1032
},
{
4
,
2048
},
{
1
,
8192
},
{
4000
,
2000
}};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_layernorm_impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
2
>
(
true
,
2
,
false
,
false
,
length
);
EXPECT_TRUE
(
success
);
}
}
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim , GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize>
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestLayernorm2dFP16
,
KernelTypes
);
TYPED_TEST
(
TestLayernorm2dFP16
,
Test_FP16
)
{
this
->
Run
();
}
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>>
;
TYPED_TEST_SUITE
(
TestLayernorm2d
,
KernelTypes
);
TYPED_TEST
(
TestLayernorm2d
,
Test_FP16
)
{
this
->
Run
();
}
test/normalization/test_layernorm2d_fp32.cpp
View file @
05ee41c3
...
...
@@ -2,28 +2,44 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "
test
_layernorm
2d_uti
l.hpp"
#include "
profiler/include/profile
_layernorm
_imp
l.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
ck
::
index_t
;
template
<
typename
Tuple
>
class
TestLayernorm2d
FP32
:
public
ck
::
T
est
Layernorm2d
<
Tuple
>
class
TestLayernorm2d
:
public
::
t
est
ing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
void
Run
()
{
// [N, D], reduce D
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{
{
4
,
256
},
{
8
,
511
},
{
9
,
1032
},
{
4
,
2048
},
{
1
,
8192
},
{
4000
,
2000
}};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_layernorm_impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
2
>
(
true
,
2
,
false
,
false
,
length
);
EXPECT_TRUE
(
success
);
}
}
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize>
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
float
,
float
,
I
<
2
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestLayernorm2dFP32
,
KernelTypes
);
TYPED_TEST
(
TestLayernorm2dFP32
,
Test_FP32
)
{
this
->
Run
();
}
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>>
;
TYPED_TEST_SUITE
(
TestLayernorm2d
,
KernelTypes
);
TYPED_TEST
(
TestLayernorm2d
,
Test_FP32
)
{
this
->
Run
();
}
test/normalization/test_layernorm2d_util.hpp
deleted
100644 → 0
View file @
37116c98
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp"
namespace
ck
{
template
<
typename
Range
>
std
::
string
serialize_range
(
const
Range
&
range
)
{
std
::
stringstream
ss
;
for
(
auto
&
r
:
range
)
{
ss
<<
r
<<
", "
;
}
std
::
string
str
=
ss
.
str
();
return
std
::
string
(
str
.
begin
(),
str
.
end
()
-
2
);
}
template
<
typename
Tuple
>
class
TestLayernorm2d
:
public
::
testing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
5
,
Tuple
>
{}.
value
;
static
constexpr
index_t
NumReduceDim
=
std
::
tuple_element_t
<
6
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BlockSize
=
std
::
tuple_element_t
<
7
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadClusterSize
=
std
::
tuple_element_t
<
8
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadClusterSize
=
std
::
tuple_element_t
<
9
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadSliceSize
=
std
::
tuple_element_t
<
10
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadSliceSize
=
std
::
tuple_element_t
<
11
,
Tuple
>
{}.
value
;
static
constexpr
index_t
XYSrcVectorDim
=
std
::
tuple_element_t
<
12
,
Tuple
>
{}.
value
;
static
constexpr
index_t
XSrcVectorSize
=
std
::
tuple_element_t
<
13
,
Tuple
>
{}.
value
;
static
constexpr
index_t
GammaSrcVectorDim
=
std
::
tuple_element_t
<
14
,
Tuple
>
{}.
value
;
static
constexpr
index_t
GammaSrcVectorSize
=
std
::
tuple_element_t
<
15
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BetaSrcVectorDim
=
std
::
tuple_element_t
<
16
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BetaSrcVectorSize
=
std
::
tuple_element_t
<
17
,
Tuple
>
{}.
value
;
static
constexpr
index_t
YDstVectorSize
=
std
::
tuple_element_t
<
18
,
Tuple
>
{}.
value
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
ReferenceInstance
=
tensor_operation
::
host
::
ReferenceLayernorm
<
XDataType
,
GammaDataType
,
BetaDataType
,
YDataType
,
AccDataType
,
PassThrough
,
Rank
,
NumReduceDim
>
;
using
DeviceInstance
=
tensor_operation
::
device
::
DeviceNormalizationImpl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
XYSrcVectorDim
,
XSrcVectorSize
,
GammaSrcVectorDim
,
GammaSrcVectorSize
,
BetaSrcVectorDim
,
BetaSrcVectorSize
,
YDstVectorSize
>
;
TestLayernorm2d
()
:
ref_instance_invoker_
(
ReferenceInstance
{}.
MakeInvoker
())
{}
void
RunSingle
(
const
std
::
vector
<
index_t
>&
lengths
,
const
std
::
vector
<
index_t
>&
reduceDims
,
const
std
::
vector
<
index_t
>&
GammaLength
,
const
std
::
vector
<
index_t
>&
GammaStride
,
const
std
::
vector
<
index_t
>&
BetaLength
,
const
std
::
vector
<
index_t
>&
BetaStride
)
{
Tensor
<
XDataType
>
x
(
lengths
);
Tensor
<
GammaDataType
>
gamma
(
GammaLength
);
Tensor
<
BetaDataType
>
beta
(
BetaLength
);
Tensor
<
YDataType
>
y
(
lengths
);
Tensor
<
YDataType
>
y_ref
(
lengths
);
x
.
GenerateTensorValue
(
GeneratorTensor_3
<
XDataType
>
{
0
,
1.0
});
gamma
.
GenerateTensorValue
(
GeneratorTensor_3
<
GammaDataType
>
{
0.0
,
1.0
});
beta
.
GenerateTensorValue
(
GeneratorTensor_3
<
BetaDataType
>
{
0.0
,
1.0
});
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
beta_dev
(
sizeof
(
BetaDataType
)
*
beta
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
y_dev
(
sizeof
(
YDataType
)
*
y
.
mDesc
.
GetElementSpaceSize
());
x_dev
.
ToDevice
(
x
.
mData
.
data
());
gamma_dev
.
ToDevice
(
gamma
.
mData
.
data
());
beta_dev
.
ToDevice
(
beta
.
mData
.
data
());
auto
device_instance
=
DeviceInstance
{};
auto
argument_ptr
=
device_instance
.
MakeArgumentPointer
(
lengths
,
std
::
vector
<
ck
::
index_t
>
{
x
.
mDesc
.
GetStrides
().
begin
(),
x
.
mDesc
.
GetStrides
().
end
()},
GammaStride
,
BetaStride
,
std
::
vector
<
ck
::
index_t
>
{
y
.
mDesc
.
GetStrides
().
begin
(),
y
.
mDesc
.
GetStrides
().
end
()},
reduceDims
,
1e-4
,
x_dev
.
GetDeviceBuffer
(),
gamma_dev
.
GetDeviceBuffer
(),
beta_dev
.
GetDeviceBuffer
(),
y_dev
.
GetDeviceBuffer
(),
PassThrough
{});
if
(
!
device_instance
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
return
;
}
auto
invoker_ptr
=
device_instance
.
MakeInvokerPointer
();
invoker_ptr
->
Run
(
argument_ptr
.
get
());
ref_instance_invoker_
.
Run
(
{
x
,
gamma
,
beta
,
y_ref
,
PassThrough
{},
lengths
,
reduceDims
,
1e-4
});
y_dev
.
FromDevice
(
y
.
mData
.
data
());
bool
pass
;
if
(
std
::
is_same
<
XDataType
,
int8_t
>::
value
)
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
y
.
mData
,
y_ref
.
mData
,
"Error: Incorrect results!"
,
0
,
1
));
}
else
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
y
.
mData
,
y_ref
.
mData
,
"Error: Incorrect results d1"
,
1e-3
,
1e-3
));
}
if
(
!
pass
)
{
FAIL
()
<<
"Failure in input lengths = ["
<<
serialize_range
(
lengths
)
<<
"], "
<<
"reduce dim = ["
<<
serialize_range
(
reduceDims
)
<<
"]."
;
}
}
void
Run
()
{
std
::
vector
<
std
::
vector
<
index_t
>>
lengths
=
{
{
4
,
256
},
{
8
,
511
},
{
9
,
1032
},
{
4
,
2048
},
{
1
,
8192
},
{
4000
,
2000
}};
for
(
auto
length
:
lengths
)
{
this
->
RunSingle
(
length
,
{
1
},
{
length
[
1
]},
{
0
,
1
},
{
length
[
1
]},
{
0
,
1
});
}
}
typename
ReferenceInstance
::
Invoker
ref_instance_invoker_
;
};
}
// namespace ck
test/reference_conv_fwd/reference_conv_fwd.cpp
View file @
05ee41c3
...
...
@@ -12,6 +12,7 @@
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/fill.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
@@ -54,7 +55,7 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParam& conv_param,
fill_input_op
(
input
.
begin
(),
input
.
end
());
fill_weights_op
(
weights
.
begin
(),
weights
.
end
());
std
::
fill
(
host_output
.
begin
(),
host_output
.
end
(),
OutDataType
(
0.
f
)
)
;
ck
::
ranges
::
fill
<
OutDataType
>
(
host_output
,
0.
f
);
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InDataType
,
...
...
@@ -122,7 +123,7 @@ TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
508.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
...
...
@@ -149,7 +150,7 @@ TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
1323., 1323., 2002.5, 2002.5, 2038.5, 2038.5, 2074.5, 2074.5, 2110.5, 2110.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv1DGNWC)
...
...
@@ -178,7 +179,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWC)
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
...
...
@@ -207,7 +208,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
...
...
@@ -301,7 +302,7 @@ TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4, 49.4};
EXPECT_TRUE(ck::utils::check_err(
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2
.mData
, ref_data, "Error: incorrect results!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2, ref_data, "Error: incorrect results!"));
}
#endif
...
...
@@ -340,8 +341,7 @@ TEST(ReferenceConvolutionFWD, Conv3DGNCDHW)
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error [case 1]: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 1]: incorrect results!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
,
ref_data
,
"Error [case 1]: incorrect results!"
));
}
TEST
(
ReferenceConvolutionFWD
,
Conv3DGNCDHWStridesDilations
)
...
...
@@ -388,5 +388,5 @@ TEST(ReferenceConvolutionFWD, Conv3DGNCDHWStridesDilations)
ref_dims
,
"Error [case 2]: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 2]: incorrect results!"
,
1e-4
f
,
1e-6
f
));
out_tensor
,
ref_data
,
"Error [case 2]: incorrect results!"
,
1e-4
f
,
1e-6
f
));
}
test/softmax/CMakeLists.txt
View file @
05ee41c3
add_custom_target
(
test_softmax
)
add_gtest_executable
(
test_softmax_
fp32
test_softmax_
fp32
.cpp
)
add_gtest_executable
(
test_softmax_
fp16
test_softmax_
fp16
.cpp
)
add_gtest_executable
(
test_softmax_int
8
test_softmax_int
8
.cpp
)
target_link_libraries
(
test_softmax_
fp32
PRIVATE utility
)
target_link_libraries
(
test_softmax_
fp16
PRIVATE utility
)
target_link_libraries
(
test_softmax_int
8
PRIVATE utility
)
add_dependencies
(
test_softmax test_softmax_
fp32
)
add_dependencies
(
test_softmax test_softmax_
fp16
)
add_dependencies
(
test_softmax test_softmax_int
8
)
add_gtest_executable
(
test_softmax_
rank3
test_softmax_
rank3
.cpp
)
add_gtest_executable
(
test_softmax_
rank4
test_softmax_
rank4
.cpp
)
add_gtest_executable
(
test_softmax_int
erface
test_softmax_int
erface
.cpp
)
target_link_libraries
(
test_softmax_
rank3
PRIVATE utility
device_softmax_instance
)
target_link_libraries
(
test_softmax_
rank4
PRIVATE utility
device_softmax_instance
)
target_link_libraries
(
test_softmax_int
erface
PRIVATE utility
device_softmax_instance
)
add_dependencies
(
test_softmax test_softmax_
rank3
)
add_dependencies
(
test_softmax test_softmax_
rank4
)
add_dependencies
(
test_softmax test_softmax_int
erface
)
test/softmax/test_softmax_fp16.cpp
deleted
100644 → 0
View file @
37116c98
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxFP16
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
ck
::
half_t
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
4
>>
,
// mixed precision
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxFP16
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxFP16
,
Test_FP16
)
{
this
->
Run
();
}
test/softmax/test_softmax_fp32.cpp
deleted
100644 → 0
View file @
37116c98
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxFP32
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
float
,
float
,
ck
::
half_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
8
>>
,
// mixed precision
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxFP32
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxFP32
,
Test_FP32
)
{
this
->
Run
();
}
test/softmax/test_softmax_int8.cpp
deleted
100644 → 0
View file @
37116c98
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxINT8
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
16
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
32
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
,
std
::
tuple
<
int8_t
,
float
,
int8_t
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
64
>
,
I
<
1
>
,
I
<
16
>
,
I
<
16
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxINT8
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxINT8
,
Test_INT8
)
{
this
->
Run
();
}
test/softmax/test_softmax_interface.cpp
0 → 100644
View file @
05ee41c3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
class
TestSoftmaxInterface
:
public
::
testing
::
Test
{
protected:
template
<
ck
::
index_t
Rank
,
ck
::
index_t
NumReduceDims
>
using
SoftmaxInstance
=
ck
::
DeviceSoftmaxInstanceWrapper
<
Rank
,
NumReduceDims
,
256
,
1
,
256
,
1
,
8
,
1
,
8
,
8
>
;
};
TEST_F
(
TestSoftmaxInterface
,
IncorrectReduceDims
)
{
std
::
vector
<
ck
::
index_t
>
lengths
{
2
,
128
,
1536
};
std
::
vector
<
ck
::
index_t
>
strides
{
128
*
1536
,
1536
,
1
};
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{
-
1
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{
3
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{
0
,
1
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
strides
,
{})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
2
>
{}.
IsSupported
(
lengths
,
strides
,
{
2
,
-
1
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
2
>
{}.
IsSupported
(
lengths
,
strides
,
{
2
,
4
})),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
2
>
{}.
IsSupported
(
lengths
,
strides
,
{
2
})),
std
::
runtime_error
);
}
TEST_F
(
TestSoftmaxInterface
,
IncorrectLengthsSize
)
{
std
::
vector
<
ck
::
index_t
>
lengths
{
128
,
1536
};
std
::
vector
<
ck
::
index_t
>
strides
{
128
*
1536
,
1536
,
1
};
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
2
};
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
({
128
,
1536
},
strides
,
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
({},
strides
,
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
({
1
,
8
,
128
,
1536
},
strides
,
reduce_dims
)),
std
::
runtime_error
);
}
TEST_F
(
TestSoftmaxInterface
,
IncorrectStridesSize
)
{
std
::
vector
<
ck
::
index_t
>
lengths
{
2
,
128
,
1536
};
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
2
};
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
{
1536
,
1
},
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
{},
reduce_dims
)),
std
::
runtime_error
);
EXPECT_THROW
((
SoftmaxInstance
<
3
,
1
>
{}.
IsSupported
(
lengths
,
{
1
,
2
,
3
,
4
},
reduce_dims
)),
std
::
runtime_error
);
}
TEST_F
(
TestSoftmaxInterface
,
UnsupportedLengths
)
{
using
SoftmaxInstance1
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
1
,
256
,
1
,
256
,
1
,
8
,
1
,
8
,
4
>
;
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
128
,
1500
},
{
128
*
1500
,
1500
,
1
},
{
2
}));
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
127
,
1536
},
{
127
*
1536
,
1536
,
1
},
{
2
}));
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
128
,
1537
},
{
128
*
1537
,
1537
,
1
},
{
2
}));
// Reduction of middle dimensions
using
SoftmaxInstance2
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
3
,
256
,
8
,
32
,
8
,
8
,
0
,
8
,
4
>
;
EXPECT_FALSE
(
SoftmaxInstance2
{}.
IsSupported
({
2
,
128
,
1536
},
{
128
*
1536
,
1536
,
1
},
{
0
,
1
,
2
}));
// Reduction of middle dimensions
using
SoftmaxInstance3
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
1
,
256
,
8
,
32
,
8
,
8
,
0
,
4
,
8
>
;
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
128
,
1536
},
{
128
*
1536
,
1536
,
1
},
{
2
}));
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
128
,
1537
},
{
128
*
1537
,
1537
,
1
},
{
1
}));
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
128
,
1540
},
{
128
*
1540
,
1540
,
1
},
{
1
}));
EXPECT_FALSE
(
SoftmaxInstance3
{}.
IsSupported
({
2
,
127
,
1536
},
{
127
*
1536
,
1536
,
1
},
{
1
}));
}
TEST_F
(
TestSoftmaxInterface
,
UnsupportedInstance
)
{
// Instance with InSrcVectorDim = 1, can't reduce middle dims if in/out vec size != 1
using
SoftmaxInstance1
=
ck
::
DeviceSoftmaxInstanceWrapper
<
3
,
1
,
256
,
8
,
32
,
1
,
8
,
1
,
8
,
8
>
;
EXPECT_FALSE
(
SoftmaxInstance1
{}.
IsSupported
({
2
,
128
,
1024
},
{
128
*
1024
,
1024
,
1
},
{
0
}));
}
test/softmax/test_softmax_rank3.cpp
0 → 100644
View file @
05ee41c3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm>
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
I8
=
int8_t
;
template
<
typename
Tuple
>
class
TestSoftmax
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank
std
::
tuple
<
F16
,
F32
,
F16
,
I
<
3
>>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I
<
3
>>
,
std
::
tuple
<
I8
,
F32
,
I8
,
I
<
3
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmax
,
KernelTypes
);
#include "test_softmax_ut_cases.inc"
test/softmax/test_softmax_rank4.cpp
0 → 100644
View file @
05ee41c3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <algorithm>
#include <stdexcept>
#include <vector>
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
I8
=
int8_t
;
template
<
typename
Tuple
>
class
TestSoftmax
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank
std
::
tuple
<
F16
,
F32
,
F16
,
I
<
4
>>
,
std
::
tuple
<
F32
,
F32
,
F32
,
I
<
4
>>
,
std
::
tuple
<
I8
,
F32
,
I8
,
I
<
4
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmax
,
KernelTypes
);
#include "test_softmax_ut_cases.inc"
test/softmax/test_softmax_ut_cases.inc
0 → 100644
View file @
05ee41c3
#pragma once
TYPED_TEST
(
TestSoftmax
,
ReduceOutermostDim
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
this
->
Rank
-
1
};
this
->
Run
(
reduce_dims
);
}
TYPED_TEST
(
TestSoftmax
,
ReduceMiddleDim
)
{
for
(
int
dim
=
0
;
dim
<
this
->
Rank
-
1
;
++
dim
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
dim
};
this
->
Run
(
reduce_dims
);
}
}
TYPED_TEST
(
TestSoftmax
,
ReduceMultipleDimsWithOutermost
)
{
for
(
int
dim
=
0
;
dim
<
this
->
Rank
-
1
;
++
dim
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
dim
,
this
->
Rank
-
1
};
this
->
Run
(
reduce_dims
);
}
}
TYPED_TEST
(
TestSoftmax
,
ReduceMultipleMiddleDims
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
{
0
,
1
};
if
(
this
->
Rank
>=
3
)
{
this
->
Run
(
reduce_dims
);
}
if
(
this
->
Rank
>=
4
)
{
reduce_dims
=
std
::
vector
<
ck
::
index_t
>
{
0
,
2
};
this
->
Run
(
reduce_dims
);
reduce_dims
=
std
::
vector
<
ck
::
index_t
>
{
0
,
1
,
2
};
this
->
Run
(
reduce_dims
);
}
}
TYPED_TEST
(
TestSoftmax
,
ReduceAllDims
)
{
std
::
vector
<
ck
::
index_t
>
reduce_dims
(
this
->
Rank
);
std
::
iota
(
std
::
begin
(
reduce_dims
),
std
::
end
(
reduce_dims
),
0
);
this
->
Run
(
reduce_dims
);
}
TYPED_TEST
(
TestSoftmax
,
ReduceOddLengths
)
{
this
->
in_lengths_
=
{{
3
,
63
,
1032
}};
if
(
this
->
Rank
>=
4
)
{
this
->
in_lengths_
=
{{
1
,
3
,
63
,
1032
}};
}
this
->
Run
({
this
->
Rank
-
1
});
this
->
Run
({
this
->
Rank
-
2
});
}
test/softmax/test_softmax_util.hpp
View file @
05ee41c3
...
...
@@ -3,19 +3,17 @@
#pragma once
#include <string>
#include <sstream>
#include <tuple>
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
#include "include/ck/utility/data_type.hpp"
#include "profiler/include/profile_softmax_impl.hpp"
namespace
ck
{
...
...
@@ -35,126 +33,110 @@ template <typename Tuple>
class
TestSoftmax
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
3
,
Tuple
>
{}.
value
;
static
constexpr
index_t
NumReduceDim
=
std
::
tuple_element_t
<
4
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BlockSize
=
std
::
tuple_element_t
<
5
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadClusterSize
=
std
::
tuple_element_t
<
6
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadClusterSize
=
std
::
tuple_element_t
<
7
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadSliceSize
=
std
::
tuple_element_t
<
8
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadSliceSize
=
std
::
tuple_element_t
<
9
,
Tuple
>
{}.
value
;
static
constexpr
index_t
InSrcVectorDim
=
std
::
tuple_element_t
<
10
,
Tuple
>
{}.
value
;
static
constexpr
index_t
InSrcVectorSize
=
std
::
tuple_element_t
<
11
,
Tuple
>
{}.
value
;
static
constexpr
index_t
OutDstVectorSize
=
std
::
tuple_element_t
<
12
,
Tuple
>
{}.
value
;
using
ReferenceInstance
=
tensor_operation
::
host
::
ReferenceSoftmax
<
InDataType
,
OutDataType
,
AccDataType
>
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
DeviceInstance
=
tensor_operation
::
device
::
DeviceSoftmaxImpl
<
InDataType
,
AccDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
InSrcVectorDim
,
InSrcVectorSize
,
OutDstVectorSize
>
;
TestSoftmax
()
:
ref_instance_invoker_
(
ReferenceInstance
{}.
MakeInvoker
())
{}
void
RunSingle
(
std
::
vector
<
index_t
>
in_length
,
AccDataType
alpha
,
AccDataType
beta
)
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
3
,
Tuple
>
{}.
value
;
public:
std
::
vector
<
std
::
vector
<
index_t
>>
in_lengths_
=
{{
2
,
128
,
1024
},
{
4
,
16
,
8448
},
{
128
,
128
,
64
}};
std
::
vector
<
std
::
vector
<
AccDataType
>>
scales_
=
{{
2
,
0
},
{
0
,
2
},
{
2
,
2
}};
bool
bench_
=
false
;
// measure kernel performance
bool
verify_
=
true
;
void
SetUp
()
override
{
std
::
vector
<
index_t
>
reduce_dims
(
NumReduceDim
);
std
::
iota
(
reduce_dims
.
begin
(),
reduce_dims
.
end
(),
Rank
-
NumReduceDim
);
Tensor
<
InDataType
>
in
(
in_length
);
Tensor
<
OutDataType
>
out
(
in_length
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
out
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
Tensor
<
OutDataType
>
out_ref
(
out
);
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpaceSize
());
in_dev
.
ToDevice
(
in
.
mData
.
data
());
out_dev
.
ToDevice
(
out
.
mData
.
data
());
std
::
vector
<
index_t
>
i_in_lengths
(
in
.
mDesc
.
GetLengths
().
begin
(),
in
.
mDesc
.
GetLengths
().
end
());
std
::
vector
<
index_t
>
i_in_strides
(
in
.
mDesc
.
GetStrides
().
begin
(),
in
.
mDesc
.
GetStrides
().
end
());
auto
device_instance
=
DeviceInstance
{};
auto
argument_ptr
=
device_instance
.
MakeArgumentPointer
(
i_in_lengths
,
i_in_strides
,
reduce_dims
,
&
alpha
,
&
beta
,
in_dev
.
GetDeviceBuffer
(),
out_dev
.
GetDeviceBuffer
(),
PassThrough
{},
PassThrough
{});
if
(
!
device_instance
.
IsSupportedArgument
(
argument_ptr
.
get
()))
if
constexpr
(
Rank
==
4
)
{
// std::cout << "Skipped due to unsupported argument: "
// << "input lengths = [" << serialize_range(in_length) << "], "
// << "scaler = [" << alpha << ", " << beta << "]." << std::endl;
return
;
in_lengths_
=
std
::
vector
<
std
::
vector
<
index_t
>>
{
{
1
,
2
,
128
,
1024
},
{
2
,
4
,
16
,
8448
},
{
1
,
128
,
128
,
64
}};
}
}
auto
invoker_ptr
=
device_instance
.
MakeInvokerPointer
();
invoker_ptr
->
Run
(
argument_ptr
.
get
());
ref_instance_invoker_
.
Run
({
in
,
out_ref
,
alpha
,
beta
,
reduce_dims
});
out_dev
.
FromDevice
(
out
.
mData
.
data
());
bool
pass
;
if
(
std
::
is_same
<
InDataType
,
int8_t
>::
value
)
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
out
.
mData
,
out_ref
.
mData
,
"Error: Incorrect results!"
,
0
,
1
));
}
else
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
out
.
mData
,
out_ref
.
mData
));
}
void
RunSingle
(
std
::
vector
<
index_t
>
in_length
,
std
::
vector
<
index_t
>
reduce_dims
,
AccDataType
alpha
,
AccDataType
beta
)
{
int
init_method
=
1
;
// integer value initialization
bool
log
=
false
;
std
::
vector
<
ck
::
index_t
>
strides
;
// intenionally empty, to get packed layout.
bool
pass
=
ck
::
profiler
::
profile_softmax_impl
<
InDataType
,
AccDataType
,
OutDataType
,
Rank
>
(
verify_
,
init_method
,
log
,
bench_
,
in_length
,
strides
,
reduce_dims
,
alpha
,
beta
);
EXPECT_TRUE
(
pass
);
}
if
(
!
pass
)
void
Run
(
std
::
vector
<
index_t
>
reduce_dims
=
{})
{
if
(
reduce_dims
.
empty
())
{
FAIL
()
<<
"Failure in input lengths = ["
<<
serialize_range
(
in_length
)
<<
"], "
<<
"scaler = ["
<<
alpha
<<
", "
<<
beta
<<
"]."
;
reduce_dims
.
push_back
(
Rank
-
1
);
}
}
void
Run
()
{
for
(
auto
in_length
:
this
->
in_lengths_
)
{
for
(
auto
scale
:
this
->
scales_
)
{
this
->
RunSingle
(
in_length
,
scale
[
0
],
scale
[
1
]);
this
->
RunSingle
(
in_length
,
reduce_dims
,
scale
[
0
],
scale
[
1
]);
}
}
}
};
std
::
vector
<
std
::
vector
<
index_t
>>
in_lengths_
=
{
{
1
,
8
,
128
},
{
2
,
128
,
1024
},
{
3
,
9
,
1032
},
{
4
,
4
,
2048
},
{
8
,
1
,
8192
}};
std
::
vector
<
std
::
vector
<
AccDataType
>>
scales_
=
{{
1
,
0
},
{
1
,
1
},
{
0
,
1
},
{
2
,
2
}};
typename
ReferenceInstance
::
Invoker
ref_instance_invoker_
;
template
<
index_t
Rank
,
index_t
NumReduceDim
,
index_t
BlockSize
,
index_t
MThreadClusterSize
,
index_t
KThreadClusterSize
,
index_t
MThreadSliceSize
,
index_t
KThreadSliceSize
,
index_t
InSrcVectorDim
,
index_t
InSrcVectorSize
,
index_t
OutDstVectorSize
>
struct
DeviceSoftmaxInstanceWrapper
{
using
F16
=
half_t
;
using
F32
=
float
;
using
Pass
=
tensor_operation
::
element_wise
::
PassThrough
;
using
InDataType
=
F16
;
using
AccDataType
=
F32
;
using
OutDataType
=
F16
;
using
InElementOp
=
Pass
;
using
AccElementOp
=
Pass
;
using
DeviceSoftmaxInstance
=
tensor_operation
::
device
::
DeviceSoftmaxImpl
<
InDataType
,
AccDataType
,
OutDataType
,
InElementOp
,
AccElementOp
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
InSrcVectorDim
,
InSrcVectorSize
,
OutDstVectorSize
>
;
bool
IsSupported
(
const
std
::
vector
<
index_t
>
in_lengths
,
const
std
::
vector
<
index_t
>
in_strides
,
const
std
::
vector
<
index_t
>
reduce_dims
)
const
{
auto
softmax
=
DeviceSoftmaxInstance
{};
auto
argument
=
softmax
.
MakeArgument
(
in_lengths
,
in_strides
,
reduce_dims
,
1
,
// alpha
1
,
// beta
nullptr
,
// in_dev
nullptr
,
// in_out
Pass
{},
// in elementwise op
Pass
{});
// acc elementwise op
return
softmax
.
IsSupportedArgument
(
argument
);
}
};
}
// namespace ck
Prev
1
…
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment