Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a1841d55
Commit
a1841d55
authored
Aug 01, 2022
by
Chao Liu
Browse files
Merge remote-tracking branch 'origin/develop' into lwpck-367
parents
127bf7f4
500fa995
Changes
373
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
443 additions
and
171 deletions
+443
-171
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
+270
-0
test/grouped_gemm/CMakeLists.txt
test/grouped_gemm/CMakeLists.txt
+1
-1
test/layernorm/CMakeLists.txt
test/layernorm/CMakeLists.txt
+5
-3
test/layernorm/test_layernorm_util.hpp
test/layernorm/test_layernorm_util.hpp
+6
-6
test/magic_number_division/CMakeLists.txt
test/magic_number_division/CMakeLists.txt
+1
-1
test/magic_number_division/magic_number_division.cpp
test/magic_number_division/magic_number_division.cpp
+3
-3
test/reduce/CMakeLists.txt
test/reduce/CMakeLists.txt
+2
-2
test/reduce/reduce_no_index.cpp
test/reduce/reduce_no_index.cpp
+1
-1
test/reduce/reduce_with_index.cpp
test/reduce/reduce_with_index.cpp
+1
-1
test/reference_conv_fwd/CMakeLists.txt
test/reference_conv_fwd/CMakeLists.txt
+1
-1
test/reference_conv_fwd/reference_conv_fwd.cpp
test/reference_conv_fwd/reference_conv_fwd.cpp
+144
-144
test/softmax/CMakeLists.txt
test/softmax/CMakeLists.txt
+4
-4
test/softmax/test_softmax_util.hpp
test/softmax/test_softmax_util.hpp
+4
-4
No files found.
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
0 → 100644
View file @
a1841d55
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/include/profile_grouped_conv_fwd_impl.hpp"
class
TestGroupedConvNdFwd
:
public
::
testing
::
Test
{
protected:
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
};
// 1d GNWC/GKXC/GNWK
TEST_F
(
TestGroupedConvNdFwd
,
GroupedConv1dFwdGNWC
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
2
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GNWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GNWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GNWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
GNWC
,
ck
::
tensor_layout
::
convolution
::
GKXC
,
ck
::
tensor_layout
::
convolution
::
GNWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
}
// 2d GNHWC/GKYXC/GNHWK
TEST_F
(
TestGroupedConvNdFwd
,
GroupedConv2dFwdGNHWC
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
GNHWC
,
ck
::
tensor_layout
::
convolution
::
GKYXC
,
ck
::
tensor_layout
::
convolution
::
GNHWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
}
// 3d GNDHWC/GKZYXC/GNDHWK
TEST_F
(
TestGroupedConvNdFwd
,
GroupedConv3dFwdGNDHWC
)
{
conv_params
.
clear
();
conv_params
.
push_back
(
{
3
,
2
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
conv_params
.
push_back
(
{
3
,
2
,
128
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
{
3
,
2
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
GNDHWC
,
ck
::
tensor_layout
::
convolution
::
GKZYXC
,
ck
::
tensor_layout
::
convolution
::
GNDHWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
}
// 2d NHWGC/KYXGC/NHWGK
TEST_F
(
TestGroupedConvNdFwd
,
GroupedConv2dFwdNHWGC
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp16
pass
=
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWGC
,
ck
::
tensor_layout
::
convolution
::
KYXGC
,
ck
::
tensor_layout
::
convolution
::
NHWGK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
}
test/grouped_gemm/CMakeLists.txt
View file @
a1841d55
add_test_executable
(
test_grouped_gemm_fp16 grouped_gemm_fp16.cpp
)
target_link_libraries
(
test_grouped_gemm_fp16 PRIVATE
host_tensor
)
target_link_libraries
(
test_grouped_gemm_fp16 PRIVATE
utility
)
target_link_libraries
(
test_grouped_gemm_fp16 PRIVATE device_grouped_gemm_instance
)
test/layernorm/CMakeLists.txt
View file @
a1841d55
...
...
@@ -2,7 +2,9 @@ add_custom_target(test_layernorm)
add_gtest_executable
(
test_layernorm_fp32 test_layernorm_fp32.cpp
)
add_gtest_executable
(
test_layernorm_fp16 test_layernorm_fp16.cpp
)
target_link_libraries
(
test_layernorm_fp32 PRIVATE host_tensor
)
target_link_libraries
(
test_layernorm_fp16 PRIVATE host_tensor
)
target_link_libraries
(
test_layernorm_fp32 PRIVATE utility
)
target_link_libraries
(
test_layernorm_fp16 PRIVATE utility
)
add_dependencies
(
test_layernorm test_layernorm_fp32
)
add_dependencies
(
test_layernorm test_layernorm_fp16
)
\ No newline at end of file
add_dependencies
(
test_layernorm test_layernorm_fp16
)
test/layernorm/test_layernorm_util.hpp
View file @
a1841d55
...
...
@@ -12,8 +12,8 @@
#include "ck/tensor_operation/gpu/device/device_layernorm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp"
namespace
ck
{
...
...
@@ -102,10 +102,10 @@ class TestLayernorm : public ::testing::Test
gamma
.
GenerateTensorValue
(
GeneratorTensor_3
<
GammaDataType
>
{
0.0
,
1.0
});
beta
.
GenerateTensorValue
(
GeneratorTensor_3
<
BetaDataType
>
{
0.0
,
1.0
});
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpace
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpace
());
DeviceMem
beta_dev
(
sizeof
(
BetaDataType
)
*
beta
.
mDesc
.
GetElementSpace
());
DeviceMem
y_dev
(
sizeof
(
YDataType
)
*
y
.
mDesc
.
GetElementSpace
());
DeviceMem
x_dev
(
sizeof
(
XDataType
)
*
x
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
gamma_dev
(
sizeof
(
GammaDataType
)
*
gamma
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
beta_dev
(
sizeof
(
BetaDataType
)
*
beta
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
y_dev
(
sizeof
(
YDataType
)
*
y
.
mDesc
.
GetElementSpace
Size
());
x_dev
.
ToDevice
(
x
.
mData
.
data
());
gamma_dev
.
ToDevice
(
gamma
.
mData
.
data
());
...
...
test/magic_number_division/CMakeLists.txt
View file @
a1841d55
add_test_executable
(
test_magic_number_division magic_number_division.cpp
)
target_link_libraries
(
test_magic_number_division PRIVATE
host_tensor
)
target_link_libraries
(
test_magic_number_division PRIVATE
utility
)
test/magic_number_division/magic_number_division.cpp
View file @
a1841d55
...
...
@@ -9,9 +9,9 @@
#include "ck/ck.hpp"
#include "ck/utility/magic_division.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/host_tensor_generator.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/host_tensor_generator.hpp"
__global__
void
gpu_magic_number_division
(
uint32_t
magic_multiplier
,
uint32_t
magic_shift
,
...
...
test/reduce/CMakeLists.txt
View file @
a1841d55
add_test_executable
(
test_reduce_no_index reduce_no_index.cpp
)
add_test_executable
(
test_reduce_with_index reduce_with_index.cpp
)
target_link_libraries
(
test_reduce_no_index PRIVATE
host_tensor
)
target_link_libraries
(
test_reduce_no_index PRIVATE
utility
)
target_link_libraries
(
test_reduce_no_index PRIVATE device_reduce_instance
)
target_link_libraries
(
test_reduce_with_index PRIVATE
host_tensor
)
target_link_libraries
(
test_reduce_with_index PRIVATE
utility
)
target_link_libraries
(
test_reduce_with_index PRIVATE device_reduce_instance
)
test/reduce/reduce_no_index.cpp
View file @
a1841d55
...
...
@@ -3,7 +3,7 @@
#include <getopt.h>
#include "ck/library/
host_tensor
/host_common_util.hpp"
#include "ck/library/
utility
/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
using
namespace
ck
;
...
...
test/reduce/reduce_with_index.cpp
View file @
a1841d55
...
...
@@ -3,7 +3,7 @@
#include <getopt.h>
#include "ck/library/
host_tensor
/host_common_util.hpp"
#include "ck/library/
utility
/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
using
namespace
ck
;
...
...
test/reference_conv_fwd/CMakeLists.txt
View file @
a1841d55
add_gtest_executable
(
test_reference_conv_fwd reference_conv_fwd.cpp
)
target_link_libraries
(
test_reference_conv_fwd PRIVATE
host_tensor conv_
util
)
target_link_libraries
(
test_reference_conv_fwd PRIVATE util
ity
)
test/reference_conv_fwd/reference_conv_fwd.cpp
View file @
a1841d55
...
...
@@ -13,74 +13,64 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/fill.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
namespace
{
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
ck
::
index_t
NDim
,
template
<
ck
::
index_t
NDim
Spatial
,
typename
InDataType
=
float
,
typename
WeiDataType
=
float
,
typename
OutDataType
=
float
,
typename
InLayout
=
ck
::
tensor_layout
::
convolution
::
NHWC
,
typename
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
KYXC
,
typename
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NHWK
,
typename
InLayout
=
ck
::
tensor_layout
::
convolution
::
G
NHWC
,
typename
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
G
KYXC
,
typename
OutLayout
=
ck
::
tensor_layout
::
convolution
::
G
NHWK
,
typename
FillInputOp
=
ck
::
utils
::
FillMonotonicSeq
<
InDataType
>,
typename
FillWeightsOp
=
ck
::
utils
::
FillConstant
<
WeiDataType
>>
Tensor
<
OutDataType
>
run_reference_convolution_forward
(
const
ck
::
utils
::
conv
::
ConvParam
s
&
param
s
,
run_reference_convolution_forward
(
const
ck
::
utils
::
conv
::
ConvParam
&
conv_
param
,
const
FillInputOp
&
fill_input_op
=
FillInputOp
{},
const
FillWeightsOp
&
fill_weights_op
=
FillWeightsOp
{
0.5
f
})
{
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params
.
N_
),
static_cast
<
std
::
size_t
>
(
params
.
C_
)};
input_dims
.
insert
(
std
::
end
(
input_dims
),
std
::
begin
(
params
.
input_spatial_lengths_
),
std
::
end
(
params
.
input_spatial_lengths_
));
const
auto
in_g_n_c_wis_desc
=
ck
::
utils
::
conv
::
make_input_host_tensor_descriptor_g_n_c_wis_packed
<
InLayout
>
(
conv_param
);
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params
.
K_
),
static_cast
<
std
::
size_t
>
(
params
.
C_
)};
filter_dims
.
insert
(
std
::
end
(
filter_dims
),
std
::
begin
(
params
.
filter_spatial_lengths_
),
std
::
end
(
params
.
filter_spatial_lengths_
));
const
auto
wei_g_k_c_xs_desc
=
ck
::
utils
::
conv
::
make_weight_host_tensor_descriptor_g_k_c_xs_packed
<
WeiLayout
>
(
conv_param
);
const
std
::
vector
<
ck
::
index_t
>&
output_spatial_lengths
=
params
.
GetOutputSpatialLengths
();
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params
.
N_
),
static_cast
<
std
::
size_t
>
(
params
.
K_
)};
output_dims
.
insert
(
std
::
end
(
output_dims
),
std
::
begin
(
output_spatial_lengths
),
std
::
end
(
output_spatial_lengths
));
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
Tensor
<
InDataType
>
input
(
ck
::
utils
::
conv
::
get_host_tensor_descriptor
(
input_dims
,
InLayout
{}));
Tensor
<
WeiDataType
>
weights
(
ck
::
utils
::
conv
::
get_host_tensor_descriptor
(
filter_dims
,
WeiLayout
{}));
Tensor
<
OutDataType
>
host_output
(
ck
::
utils
::
conv
::
get_host_tensor_descriptor
(
output_dims
,
OutLayout
{}));
Tensor
<
InDataType
>
input
(
in_g_n_c_wis_desc
);
Tensor
<
WeiDataType
>
weights
(
wei_g_k_c_xs_desc
);
Tensor
<
OutDataType
>
host_output
(
out_g_n_k_wos_desc
);
fill_input_op
(
input
.
begin
(),
input
.
end
());
fill_weights_op
(
weights
.
begin
(),
weights
.
end
());
std
::
fill
(
host_output
.
begin
(),
host_output
.
end
(),
OutDataType
(
0.
f
));
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
InDataType
,
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
NDim
>
();
OutElementOp
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
input
,
weights
,
host_output
,
param
s
.
conv_filter_strides_
,
param
s
.
conv_filter_dilations_
,
param
s
.
input_left_pads_
,
param
s
.
input_right_pads_
,
conv_
param
.
conv_filter_strides_
,
conv_
param
.
conv_filter_dilations_
,
conv_
param
.
input_left_pads_
,
conv_
param
.
input_right_pads_
,
InElementOp
{},
WeiElementOp
{},
OutElementOp
{});
...
...
@@ -91,21 +81,29 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
}
// anonymous namespace
TEST
(
ReferenceConvolutionFWD
,
Conv2DNHWC
)
// Eeference convolution assume dimensions of tensor descriptors are in GNCDHW/GKCZYX/GNKDHW order,
// regardless of physical tensor layouts in memory.
// Some tests below assume dimensions of tensor descriptors can be in other order, and therefore
// are disabled
// TODO: add more tests, which comply with assumption about dimension order of reference convolution
// and add tests for more physical layout
#if 0
TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
{
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
N_
=
1
;
params
.
K_
=
1
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
6
,
6
};
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
};
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
};
ck::utils::conv::ConvParam conv_param(2,
1,
1,
1,
2,
std::vector<ck::index_t>{3, 3},
std::vector<ck::index_t>{6, 6},
std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{0, 0},
std::vector<ck::index_t>{0, 0});
auto
out_tensor
=
run_reference_convolution_forward
<
2
>
(
param
s
);
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
4
,
4
};
auto out_tensor = run_reference_convolution_forward<2>(
conv_
param);
std::vector<std::size_t> ref_dims{1, 1, 4, 4
, 1
};
std::vector<float> ref_data{130.5,
148.5,
166.5,
...
...
@@ -127,21 +125,22 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWC)
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST
(
ReferenceConvolutionFWD
,
Conv2DNHWCStridesDilationsPadding
)
TEST(ReferenceConvolutionFWD, Conv2D
G
NHWCStridesDilationsPadding)
{
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
N_
=
1
;
params
.
K_
=
2
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
12
,
12
};
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
};
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
};
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
ck::utils::conv::ConvParam conv_param(2,
1,
1,
2,
2,
std::vector<ck::index_t>{3, 3},
std::vector<ck::index_t>{12, 12},
std::vector<ck::index_t>{2, 2},
std::vector<ck::index_t>{2, 2},
std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{1, 1});
auto
out_tensor
=
run_reference_convolution_forward
<
2
>
(
param
s
);
std
::
vector
<
std
::
size_t
>
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
1
,
2
,
5
,
5
};
auto out_tensor = run_reference_convolution_forward<2>(
conv_
param);
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1,
5
, 5,
2
};
std::vector<float> ref_data{
210., 210., 327., 327., 351., 351., 375., 375., 399., 399.,
459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5,
...
...
@@ -153,88 +152,88 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST
(
ReferenceConvolutionFWD
,
Conv1DNWC
)
TEST(ReferenceConvolutionFWD, Conv1D
G
NWC)
{
ck
::
utils
::
conv
::
ConvParam
s
param
s
;
params
.
num_dim_spatial_
=
1
;
params
.
N_
=
1
;
params
.
K_
=
1
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
}
;
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
6
}
;
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
1
}
;
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
}
;
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
}
;
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
};
ck::utils::conv::ConvParam
conv_
param
(1,
1
,
1,
1,
2,
std::vector<ck::index_t>{3}
,
std::vector<ck::index_t>{6}
,
std::vector<ck::index_t>{1}
,
std::vector<ck::index_t>{1}
,
std::vector<ck::index_t>{0}
,
std::vector<ck::index_t>{0}
)
;
auto out_tensor =
run_reference_convolution_forward<1,
float,
float,
float,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
param
s
);
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
4
};
ck::tensor_layout::convolution::
G
NWC,
ck::tensor_layout::convolution::
G
KXC,
ck::tensor_layout::convolution::
G
NWK>(
conv_
param);
std::vector<std::size_t> ref_dims{1, 1, 4
, 1
};
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST
(
ReferenceConvolutionFWD
,
Conv1DNWCStridesDilationsPadding
)
TEST(ReferenceConvolutionFWD, Conv1D
G
NWCStridesDilationsPadding)
{
ck
::
utils
::
conv
::
ConvParam
s
param
s
;
params
.
num_dim_spatial_
=
1
;
params
.
N_
=
1
;
params
.
K_
=
2
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
}
;
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
12
}
;
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
2
}
;
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
2
}
;
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
}
;
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
ck::utils::conv::ConvParam
conv_
param
(1,
1
,
1,
2,
2,
std::vector<ck::index_t>{3}
,
std::vector<ck::index_t>{12}
,
std::vector<ck::index_t>{2}
,
std::vector<ck::index_t>{2}
,
std::vector<ck::index_t>{1}
,
std::vector<ck::index_t>{1}
)
;
auto out_tensor =
run_reference_convolution_forward<1,
float,
float,
float,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
param
s
);
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
2
,
5
};
ck::tensor_layout::convolution::
G
NWC,
ck::tensor_layout::convolution::
G
KXC,
ck::tensor_layout::convolution::
G
NWK>(
conv_
param);
std::vector<std::size_t> ref_dims{1,
1
, 5
, 2
};
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST
(
ReferenceConvolutionFWD
,
Conv1DNWCSameOutputSize
)
TEST(ReferenceConvolutionFWD, Conv1D
G
NWCSameOutputSize)
{
ck
::
utils
::
conv
::
ConvParam
s
param
s
;
params
.
num_dim_spatial_
=
1
;
params
.
N_
=
2
;
params
.
K_
=
16
;
params
.
C_
=
4
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
}
;
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
16
}
;
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
1
}
;
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
}
;
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
}
;
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
ck::utils::conv::ConvParam
conv_
param
(1,
1
,
2,
16
,
4,
std::vector<ck::index_t>{3}
,
std::vector<ck::index_t>{16}
,
std::vector<ck::index_t>{1}
,
std::vector<ck::index_t>{1}
,
std::vector<ck::index_t>{1}
,
std::vector<ck::index_t>{1}
)
;
auto out_tensor2 = run_reference_convolution_forward<1,
float,
float,
float,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
param
s
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
ck::tensor_layout::convolution::
G
NWC,
ck::tensor_layout::convolution::
G
KXC,
ck::tensor_layout::convolution::
G
NWK>(
conv_
param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std
::
vector
<
std
::
size_t
>
ref_dims
{
2
,
16
,
16
};
std::vector<std::size_t> ref_dims{
1,
2, 16, 16};
std::vector<float> ref_data{
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
...
...
@@ -304,30 +303,31 @@ TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!"));
}
#endif
TEST
(
ReferenceConvolutionFWD
,
Conv3DNCDHW
)
TEST
(
ReferenceConvolutionFWD
,
Conv3D
G
NCDHW
)
{
ck
::
utils
::
conv
::
ConvParam
s
param
s
;
params
.
num_dim_spatial_
=
3
;
params
.
N_
=
1
;
params
.
K_
=
1
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
}
;
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
6
,
6
,
6
}
;
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
}
;
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
}
;
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
}
;
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
ck
::
utils
::
conv
::
ConvParam
conv_
param
(
3
,
1
,
1
,
1
,
2
,
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
}
,
std
::
vector
<
ck
::
index_t
>
{
6
,
6
,
6
}
,
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
}
,
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
}
,
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
}
,
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
}
)
;
auto
out_tensor
=
run_reference_convolution_forward
<
3
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NCDHW
,
ck
::
tensor_layout
::
convolution
::
KCZYX
,
ck
::
tensor_layout
::
convolution
::
NKDHW
>
(
param
s
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
4
,
4
,
4
};
ck
::
tensor_layout
::
convolution
::
G
NCDHW
,
ck
::
tensor_layout
::
convolution
::
G
KCZYX
,
ck
::
tensor_layout
::
convolution
::
G
NKDHW
>
(
conv_
param
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
1
,
4
,
4
,
4
};
std
::
vector
<
float
>
ref_data
{
407.7
,
410.40002
,
413.09998
,
415.80002
,
423.90002
,
426.6
,
429.30002
,
432.
,
440.1
,
442.80002
,
445.5
,
448.2
,
456.30002
,
459.
,
461.7
,
464.40002
,
...
...
@@ -344,29 +344,29 @@ TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 1]: incorrect results!"
));
}
TEST
(
ReferenceConvolutionFWD
,
Conv3DNCDHWStridesDilations
)
TEST
(
ReferenceConvolutionFWD
,
Conv3D
G
NCDHWStridesDilations
)
{
ck
::
utils
::
conv
::
ConvParam
s
param
s
;
params
.
num_dim_spatial_
=
3
;
params
.
N_
=
1
;
params
.
K_
=
2
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
}
;
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
12
,
12
,
12
}
;
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
}
;
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
}
;
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
}
;
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
ck
::
utils
::
conv
::
ConvParam
conv_
param
(
3
,
1
,
1
,
2
,
2
,
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
}
,
std
::
vector
<
ck
::
index_t
>
{
12
,
12
,
12
}
,
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
}
,
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
}
,
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
}
,
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
}
)
;
auto
out_tensor
=
run_reference_convolution_forward
<
3
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NCDHW
,
ck
::
tensor_layout
::
convolution
::
KCZYX
,
ck
::
tensor_layout
::
convolution
::
NKDHW
>
(
param
s
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
2
,
4
,
4
,
4
};
ck
::
tensor_layout
::
convolution
::
G
NCDHW
,
ck
::
tensor_layout
::
convolution
::
G
KCZYX
,
ck
::
tensor_layout
::
convolution
::
G
NKDHW
>
(
conv_
param
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
2
,
4
,
4
,
4
};
std
::
vector
<
float
>
ref_data
{
2756.7002
,
2764.7998
,
2772.9001
,
2781.
,
2853.9001
,
2862.
,
2870.1
,
2878.2002
,
2951.1
,
2959.2002
,
2967.2998
,
2975.4001
,
3048.2998
,
3056.4001
,
3064.5
,
3072.6
,
...
...
test/softmax/CMakeLists.txt
View file @
a1841d55
...
...
@@ -3,9 +3,9 @@ add_custom_target(test_softmax)
add_gtest_executable
(
test_softmax_fp32 test_softmax_fp32.cpp
)
add_gtest_executable
(
test_softmax_fp16 test_softmax_fp16.cpp
)
add_gtest_executable
(
test_softmax_int8 test_softmax_int8.cpp
)
target_link_libraries
(
test_softmax_fp32 PRIVATE
host_tensor
)
target_link_libraries
(
test_softmax_fp16 PRIVATE
host_tensor
)
target_link_libraries
(
test_softmax_int8 PRIVATE
host_tensor
)
target_link_libraries
(
test_softmax_fp32 PRIVATE
utility
)
target_link_libraries
(
test_softmax_fp16 PRIVATE
utility
)
target_link_libraries
(
test_softmax_int8 PRIVATE
utility
)
add_dependencies
(
test_softmax test_softmax_fp32
)
add_dependencies
(
test_softmax test_softmax_fp16
)
add_dependencies
(
test_softmax test_softmax_int8
)
\ No newline at end of file
add_dependencies
(
test_softmax test_softmax_int8
)
test/softmax/test_softmax_util.hpp
View file @
a1841d55
...
...
@@ -12,8 +12,8 @@
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/
host_tensor
/host_tensor.hpp"
#include "ck/library/
host_tensor
/device_memory.hpp"
#include "ck/library/
utility
/host_tensor.hpp"
#include "ck/library/
utility
/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace
ck
{
...
...
@@ -80,8 +80,8 @@ class TestSoftmax : public ::testing::Test
Tensor
<
OutDataType
>
out_ref
(
out
);
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
());
DeviceMem
in_dev
(
sizeof
(
InDataType
)
*
in
.
mDesc
.
GetElementSpace
Size
());
DeviceMem
out_dev
(
sizeof
(
OutDataType
)
*
out
.
mDesc
.
GetElementSpace
Size
());
in_dev
.
ToDevice
(
in
.
mData
.
data
());
out_dev
.
ToDevice
(
out
.
mData
.
data
());
...
...
Prev
1
…
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment