Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cef3d91f
Commit
cef3d91f
authored
Sep 19, 2022
by
rocking
Browse files
Test groupnorm kernel from device_instance
parent
cb2b5c86
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
108 additions
and
210 deletions
+108
-210
test/layernorm/CMakeLists.txt
test/layernorm/CMakeLists.txt
+5
-1
test/layernorm/test_groupnorm_fp16.cpp
test/layernorm/test_groupnorm_fp16.cpp
+46
-18
test/layernorm/test_groupnorm_fp32.cpp
test/layernorm/test_groupnorm_fp32.cpp
+57
-0
test/layernorm/test_groupnorm_util.hpp
test/layernorm/test_groupnorm_util.hpp
+0
-191
No files found.
test/layernorm/CMakeLists.txt
View file @
cef3d91f
...
@@ -3,11 +3,15 @@ add_custom_target(test_layernorm)
...
@@ -3,11 +3,15 @@ add_custom_target(test_layernorm)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_layernorm2d_fp32 test_layernorm2d_fp32.cpp
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_layernorm2d_fp16 test_layernorm2d_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp16 test_groupnorm_fp16.cpp
)
add_gtest_executable
(
test_groupnorm_fp32 test_groupnorm_fp32.cpp
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility
)
target_link_libraries
(
test_layernorm2d_fp32 PRIVATE utility
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility
)
target_link_libraries
(
test_layernorm2d_fp16 PRIVATE utility
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility
)
target_link_libraries
(
test_groupnorm_fp16 PRIVATE utility device_normalization_instance
)
target_link_libraries
(
test_groupnorm_fp32 PRIVATE utility device_normalization_instance
)
add_dependencies
(
test_layernorm test_layernorm2d_fp32
)
add_dependencies
(
test_layernorm test_layernorm2d_fp32
)
add_dependencies
(
test_layernorm test_layernorm2d_fp16
)
add_dependencies
(
test_layernorm test_layernorm2d_fp16
)
add_dependencies
(
test_layernorm test_groupnorm_fp16
)
add_dependencies
(
test_layernorm test_groupnorm_fp16
)
add_dependencies
(
test_layernorm test_groupnorm_fp32
)
test/layernorm/test_groupnorm_fp16.cpp
View file @
cef3d91f
...
@@ -2,28 +2,56 @@
...
@@ -2,28 +2,56 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "
test
_groupnorm_
uti
l.hpp"
#include "
profiler/include/profile
_groupnorm_
imp
l.hpp"
template
<
ck
::
index_t
N
>
using
F16
=
ck
::
half_t
;
using
I
=
ck
::
Number
<
N
>
;
using
F32
=
float
;
using
ck
::
index_t
;
using
ck
::
profiler
::
ElementwiseOpEnum
;
template
<
typename
Tuple
>
template
<
typename
Tuple
>
class
TestGroupnorm
FP16
:
public
ck
::
T
est
Groupnorm
<
Tuple
>
class
TestGroupnorm
:
public
::
t
est
ing
::
Test
{
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
void
Run
()
{
// N, H, W, G, C
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{{
1
,
1
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
,
5
},
{
256
,
9
,
9
,
9
,
9
},
{
1
,
64
,
64
,
32
,
10
},
{
1
,
32
,
32
,
32
,
20
},
{
1
,
16
,
16
,
32
,
40
}};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_groupnorm_impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
>
(
true
,
2
,
false
,
false
,
length
,
ElementwiseOpEnum
::
eSigmoid
);
EXPECT_TRUE
(
success
);
}
}
};
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
using
KernelTypes
=
::
testing
::
Types
<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim , GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize>
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
,
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
half_t
,
I
<
5
>
,
I
<
3
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
1
>
,
I
<
8
>
,
I
<
8
>>
std
::
tuple
<
F16
,
F16
,
F16
,
F32
,
F16
>>
;
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestGroupnorm
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestGroupnormFP16
,
KernelTypes
);
TYPED_TEST
(
TestGroupnorm
,
Test_FP16
)
{
this
->
Run
();
}
TYPED_TEST
(
TestGroupnormFP16
,
Test_FP16
)
{
this
->
Run
();
}
test/layernorm/test_groupnorm_fp32.cpp
0 → 100644
View file @
cef3d91f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/include/profile_groupnorm_impl.hpp"
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
ck
::
index_t
;
using
ck
::
profiler
::
ElementwiseOpEnum
;
template
<
typename
Tuple
>
class
TestGroupnorm
:
public
::
testing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
void
Run
()
{
// N, H, W, G, C
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{{
1
,
1
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
,
5
},
{
256
,
9
,
9
,
9
,
9
},
{
1
,
64
,
64
,
32
,
10
},
{
1
,
32
,
32
,
32
,
20
},
{
1
,
16
,
16
,
32
,
40
}};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_groupnorm_impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
>
(
true
,
2
,
false
,
false
,
length
,
ElementwiseOpEnum
::
eSigmoid
);
EXPECT_TRUE
(
success
);
}
}
};
using
KernelTypes
=
::
testing
::
Types
<
// XDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>
,
std
::
tuple
<
F32
,
F32
,
F32
,
F32
,
F32
>>
;
TYPED_TEST_SUITE
(
TestGroupnorm
,
KernelTypes
);
TYPED_TEST
(
TestGroupnorm
,
Test_FP16
)
{
this
->
Run
();
}
test/layernorm/test_groupnorm_util.hpp
deleted
100644 → 0
View file @
cb2b5c86
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/device_layernorm_impl.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp"
namespace
ck
{
template
<
typename
Range
>
std
::
string
serialize_range
(
const
Range
&
range
)
{
std
::
stringstream
ss
;
for
(
auto
&
r
:
range
)
{
ss
<<
r
<<
", "
;
}
std
::
string
str
=
ss
.
str
();
return
std
::
string
(
str
.
begin
(),
str
.
end
()
-
2
);
}
template
<
typename
Tuple
>
class
TestGroupnorm
:
public
::
testing
::
Test
{
protected:
using
XDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
GammaDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
BetaDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
YDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
5
,
Tuple
>
{}.
value
;
static
constexpr
index_t
NumReduceDim
=
std
::
tuple_element_t
<
6
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BlockSize
=
std
::
tuple_element_t
<
7
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadClusterSize
=
std
::
tuple_element_t
<
8
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadClusterSize
=
std
::
tuple_element_t
<
9
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadSliceSize
=
std
::
tuple_element_t
<
10
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadSliceSize
=
std
::
tuple_element_t
<
11
,
Tuple
>
{}.
value
;
static
constexpr
index_t
XYSrcVectorDim
=
std
::
tuple_element_t
<
12
,
Tuple
>
{}.
value
;
static
constexpr
index_t
XSrcVectorSize
=
std
::
tuple_element_t
<
13
,
Tuple
>
{}.
value
;
static
constexpr
index_t
GammaSrcVectorDim
=
std
::
tuple_element_t
<
14
,
Tuple
>
{}.
value
;
static
constexpr
index_t
GammaSrcVectorSize
=
std
::
tuple_element_t
<
15
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BetaSrcVectorDim
=
std
::
tuple_element_t
<
16
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BetaSrcVectorSize
=
std
::
tuple_element_t
<
17
,
Tuple
>
{}.
value
;
static
constexpr
index_t
YDstVectorSize
=
std
::
tuple_element_t
<
18
,
Tuple
>
{}.
value
;
using
Sigmoid
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
ReferenceInstance
=
tensor_operation
::
host
::
ReferenceGroupnorm
<
XDataType
,
GammaDataType
,
BetaDataType
,
YDataType
,
AccDataType
,
Sigmoid
>
;
using
DeviceInstance
=
tensor_operation
::
device
::
DeviceLayernormImpl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
Sigmoid
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
XYSrcVectorDim
,
XSrcVectorSize
,
GammaSrcVectorDim
,
GammaSrcVectorSize
,
BetaSrcVectorDim
,
BetaSrcVectorSize
,
YDstVectorSize
>
;
TestGroupnorm
()
:
ref_instance_invoker_
(
ReferenceInstance
{}.
MakeInvoker
())
{}
void
RunSingle
(
const
std
::
vector
<
index_t
>&
lengths
,
const
std
::
vector
<
index_t
>&
reduceDims
,
const
std
::
vector
<
index_t
>&
GammaLength
,
const
std
::
vector
<
index_t
>&
GammaStride
,
const
std
::
vector
<
index_t
>&
BetaLength
,
const
std
::
vector
<
index_t
>&
BetaStride
)
{
Tensor
<
XDataType
>
x
(
lengths
);
Tensor
<
GammaDataType
>
gamma
(
GammaLength
);
Tensor
<
BetaDataType
>
beta
(
BetaLength
);
Tensor
<
YDataType
>
y
(
lengths
);
Tensor
<
YDataType
>
y_ref
(
lengths
);
x
.
GenerateTensorValue
(
GeneratorTensor_3
<
XDataType
>
{
0
,
1.0
});
gamma
.
GenerateTensorValue
(
GeneratorTensor_3
<
GammaDataType
>
{
0.0
,
1.0
});
beta
.
GenerateTensorValue
(
GeneratorTensor_3
<
BetaDataType
>
{
0.0
,
1.0
});
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
beta_dev
(
sizeof
(
BetaDataType
)
*
beta
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
y_dev
(
sizeof
(
YDataType
)
*
y
.
mDesc
.
GetElementSpaceSize
());
x_dev
.
ToDevice
(
x
.
mData
.
data
());
gamma_dev
.
ToDevice
(
gamma
.
mData
.
data
());
beta_dev
.
ToDevice
(
beta
.
mData
.
data
());
auto
device_instance
=
DeviceInstance
{};
auto
argument_ptr
=
device_instance
.
MakeArgumentPointer
(
lengths
,
std
::
vector
<
ck
::
index_t
>
{
x
.
mDesc
.
GetStrides
().
begin
(),
x
.
mDesc
.
GetStrides
().
end
()},
GammaStride
,
BetaStride
,
std
::
vector
<
ck
::
index_t
>
{
y
.
mDesc
.
GetStrides
().
begin
(),
y
.
mDesc
.
GetStrides
().
end
()},
reduceDims
,
1e-6
,
x_dev
.
GetDeviceBuffer
(),
gamma_dev
.
GetDeviceBuffer
(),
beta_dev
.
GetDeviceBuffer
(),
y_dev
.
GetDeviceBuffer
(),
Sigmoid
{});
if
(
!
device_instance
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
return
;
}
auto
invoker_ptr
=
device_instance
.
MakeInvokerPointer
();
invoker_ptr
->
Run
(
argument_ptr
.
get
());
ref_instance_invoker_
.
Run
({
x
,
gamma
,
beta
,
y_ref
,
Sigmoid
{},
lengths
,
1e-6
});
y_dev
.
FromDevice
(
y
.
mData
.
data
());
bool
pass
;
if
(
std
::
is_same
<
XDataType
,
int8_t
>::
value
)
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
y
.
mData
,
y_ref
.
mData
,
"Error: Incorrect results!"
,
0
,
1
));
}
else
{
EXPECT_TRUE
(
pass
=
ck
::
utils
::
check_err
(
y
.
mData
,
y_ref
.
mData
,
"Error: Incorrect results"
,
1e-3
,
1e-3
));
}
if
(
!
pass
)
{
FAIL
()
<<
"Failure in input lengths = ["
<<
serialize_range
(
lengths
)
<<
"], "
<<
"reduce dim = ["
<<
serialize_range
(
reduceDims
)
<<
"]."
;
}
}
void
Run
()
{
// N, H, W, G, C
std
::
vector
<
std
::
vector
<
index_t
>>
lengths
=
{{
1
,
1
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
,
5
},
{
256
,
9
,
9
,
9
,
9
},
{
1
,
64
,
64
,
32
,
10
},
{
1
,
32
,
32
,
32
,
20
},
{
1
,
16
,
16
,
32
,
40
}};
// reduceDims = [H, W, C]
std
::
vector
<
index_t
>
reduceDims
=
{
1
,
2
,
4
};
for
(
auto
length
:
lengths
)
{
index_t
G
=
length
[
3
];
index_t
C
=
length
[
4
];
std
::
vector
<
index_t
>
gammaBetaLength
=
{
G
,
C
};
std
::
vector
<
index_t
>
gammaBetaStride
=
{
0
,
0
,
0
,
C
,
1
};
this
->
RunSingle
(
length
,
reduceDims
,
gammaBetaLength
,
gammaBetaStride
,
gammaBetaLength
,
gammaBetaStride
);
}
}
typename
ReferenceInstance
::
Invoker
ref_instance_invoker_
;
};
}
// namespace ck
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment