Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cba8f7f2
Commit
cba8f7f2
authored
Jun 26, 2022
by
Anthony Chang
Browse files
Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4
parents
cc50b687
b653c5eb
Changes
583
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
153 additions
and
1 deletion
+153
-1
test/softmax/test_softmax_fp32.cpp
test/softmax/test_softmax_fp32.cpp
+29
-0
test/softmax/test_softmax_util.hpp
test/softmax/test_softmax_util.hpp
+118
-0
test/space_filling_curve/space_filling_curve.cpp
test/space_filling_curve/space_filling_curve.cpp
+6
-1
No files found.
test/softmax/test_softmax_fp32.cpp
0 → 100644
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template
<
ck
::
index_t
N
>
using
I
=
ck
::
Number
<
N
>
;
template
<
typename
Tuple
>
class
TestSoftmaxFP32
:
public
ck
::
TestSoftmax
<
Tuple
>
{
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
8
>
,
I
<
32
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
4
>
,
I
<
64
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
2
>
,
I
<
128
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
,
std
::
tuple
<
float
,
float
,
float
,
I
<
3
>
,
I
<
2
>
,
I
<
256
>
,
I
<
1
>
,
I
<
256
>
,
I
<
1
>
,
I
<
4
>
,
I
<
1
>
,
I
<
4
>
,
I
<
4
>>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestSoftmaxFP32
,
KernelTypes
);
TYPED_TEST
(
TestSoftmaxFP32
,
Test_FP32
)
{
this
->
Run
();
}
test/softmax/test_softmax_util.hpp
0 → 100644
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace
ck
{
template
<
typename
Tuple
>
class
TestSoftmax
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
static
constexpr
index_t
Rank
=
std
::
tuple_element_t
<
3
,
Tuple
>
{}.
value
;
static
constexpr
index_t
NumReduceDim
=
std
::
tuple_element_t
<
4
,
Tuple
>
{}.
value
;
static
constexpr
index_t
BlockSize
=
std
::
tuple_element_t
<
5
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadClusterSize
=
std
::
tuple_element_t
<
6
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadClusterSize
=
std
::
tuple_element_t
<
7
,
Tuple
>
{}.
value
;
static
constexpr
index_t
MThreadSliceSize
=
std
::
tuple_element_t
<
8
,
Tuple
>
{}.
value
;
static
constexpr
index_t
KThreadSliceSize
=
std
::
tuple_element_t
<
9
,
Tuple
>
{}.
value
;
static
constexpr
index_t
InSrcVectorDim
=
std
::
tuple_element_t
<
10
,
Tuple
>
{}.
value
;
static
constexpr
index_t
InSrcVectorSize
=
std
::
tuple_element_t
<
11
,
Tuple
>
{}.
value
;
static
constexpr
index_t
OutDstVectorSize
=
std
::
tuple_element_t
<
12
,
Tuple
>
{}.
value
;
using
ReferenceInstance
=
tensor_operation
::
host
::
ReferenceSoftmax
<
InDataType
,
OutDataType
,
AccDataType
>
;
using
DeviceInstance
=
tensor_operation
::
device
::
DeviceSoftmax
<
InDataType
,
AccDataType
,
OutDataType
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
InSrcVectorDim
,
InSrcVectorSize
,
OutDstVectorSize
>
;
TestSoftmax
()
:
ref_instance_invoker_
(
ReferenceInstance
{}.
MakeInvoker
())
{}
void
RunSingle
(
std
::
vector
<
index_t
>
in_length
,
AccDataType
alpha
,
AccDataType
beta
)
{
std
::
vector
<
index_t
>
reduce_dims
(
NumReduceDim
);
std
::
iota
(
reduce_dims
.
begin
(),
reduce_dims
.
end
(),
Rank
-
NumReduceDim
);
Tensor
<
InDataType
>
in
(
in_length
);
Tensor
<
OutDataType
>
out
(
in_length
);
in
.
GenerateTensorValue
(
GeneratorTensor_2
<
InDataType
>
{
-
5
,
5
});
out
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
Tensor
<
OutDataType
>
out_ref
(
out
);
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
());
in_dev
.
ToDevice
(
in
.
mData
.
data
());
out_dev
.
ToDevice
(
out
.
mData
.
data
());
std
::
vector
<
index_t
>
i_in_lengths
(
in
.
mDesc
.
GetLengths
().
begin
(),
in
.
mDesc
.
GetLengths
().
end
());
std
::
vector
<
index_t
>
i_in_strides
(
in
.
mDesc
.
GetStrides
().
begin
(),
in
.
mDesc
.
GetStrides
().
end
());
auto
device_instance
=
DeviceInstance
{};
auto
argument_ptr
=
device_instance
.
MakeArgumentPointer
(
i_in_lengths
,
i_in_strides
,
reduce_dims
,
alpha
,
beta
,
in_dev
.
GetDeviceBuffer
(),
out_dev
.
GetDeviceBuffer
());
if
(
!
device_instance
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
FAIL
()
<<
"Unsupported argument"
;
}
auto
invoker_ptr
=
device_instance
.
MakeInvokerPointer
();
invoker_ptr
->
Run
(
argument_ptr
.
get
());
ref_instance_invoker_
.
Run
({
in
,
out_ref
,
alpha
,
beta
,
Rank
,
reduce_dims
});
out_dev
.
FromDevice
(
out
.
mData
.
data
());
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out
.
mData
,
out_ref
.
mData
));
}
void
Run
()
{
for
(
auto
in_length
:
this
->
in_lengths_
)
{
for
(
auto
scale
:
this
->
scales_
)
{
this
->
RunSingle
(
in_length
,
std
::
get
<
0
>
(
scale
),
std
::
get
<
1
>
(
scale
));
}
}
}
std
::
vector
<
std
::
vector
<
index_t
>>
in_lengths_
=
{{
1
,
8
,
128
},
{
2
,
128
,
1024
},
{
3
,
9
,
1032
}};
std
::
vector
<
std
::
tuple
<
AccDataType
,
AccDataType
>>
scales_
=
{{
1
,
0
},
{
2
,
2
},
{
0
,
1
}};
typename
ReferenceInstance
::
Invoker
ref_instance_invoker_
;
};
}
// namespace ck
test/space_filling_curve/space_filling_curve.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include <iostream>
#include <numeric>
#include <cassert>
#include "tensor_space_filling_curve.hpp"
#include "ck/ck.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
using
namespace
ck
;
...
...
Prev
1
…
26
27
28
29
30
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment