Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b134b7d6
Commit
b134b7d6
authored
May 16, 2022
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into cpu_avx2
parents
090ba885
9f71ff48
Changes
211
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
504 additions
and
519 deletions
+504
-519
test/gemm/gemm_fp32.cpp
test/gemm/gemm_fp32.cpp
+1
-1
test/gemm/gemm_int8.cpp
test/gemm/gemm_int8.cpp
+1
-1
test/gemm/gemm_util.hpp
test/gemm/gemm_util.hpp
+333
-340
test/gemm_reduce/gemm_reduce_fp16.cpp
test/gemm_reduce/gemm_reduce_fp16.cpp
+4
-4
test/gemm_split_k/gemm_split_k.cpp
test/gemm_split_k/gemm_split_k.cpp
+3
-2
test/grouped_gemm/grouped_gemm_fp16.cpp
test/grouped_gemm/grouped_gemm_fp16.cpp
+3
-3
test/reduce/reduce_no_index.cpp
test/reduce/reduce_no_index.cpp
+1
-1
test/reduce/reduce_util.hpp
test/reduce/reduce_util.hpp
+1
-1
test/reduce/reduce_with_index.cpp
test/reduce/reduce_with_index.cpp
+1
-1
test/reference_conv_fwd/CMakeLists.txt
test/reference_conv_fwd/CMakeLists.txt
+2
-2
test/reference_conv_fwd/reference_conv_fwd.cpp
test/reference_conv_fwd/reference_conv_fwd.cpp
+154
-163
No files found.
test/gemm/gemm_fp32.cpp
View file @
b134b7d6
...
...
@@ -15,7 +15,7 @@
#include "host_gemm.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp"
#include "device_gemm_xdl_c
_
shuffle.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
...
...
test/gemm/gemm_int8.cpp
View file @
b134b7d6
...
...
@@ -15,7 +15,7 @@
#include "host_gemm.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp"
#include "device_gemm_xdl_c
_
shuffle.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
...
...
test/gemm/gemm_util.hpp
View file @
b134b7d6
#ifndef GEMM_UTILS_HPP
#define GEMM_UTILS_HPP
#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "reference_gemm.hpp"
#include "tensor_layout.hpp"
namespace
ck
{
namespace
gemm_util
{
struct
GemmParams
{
GemmParams
()
:
M
(
1024
),
N
(
1024
),
K
(
1024
),
StrideA
(
1024
),
StrideB
(
1024
),
StrideC
(
1024
),
alpha
(
1
),
beta
(
0
)
{
}
ck
::
index_t
M
;
ck
::
index_t
N
;
ck
::
index_t
K
;
ck
::
index_t
StrideA
;
ck
::
index_t
StrideB
;
ck
::
index_t
StrideC
;
float
alpha
;
float
beta
;
};
template
<
typename
GemmInstance
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
void
RunHostGEMM
(
const
Tensor
<
ADataType
>&
A
,
const
Tensor
<
BDataType
>&
B
,
Tensor
<
CDataType
>&
C
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
CElementwiseOperation
c_element_op
)
{
auto
ref_gemm
=
GemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
auto
ref_argument
=
ref_gemm
.
MakeArgument
(
A
,
B
,
C
,
a_element_op
,
b_element_op
,
c_element_op
);
ref_invoker
.
Run
(
ref_argument
);
}
template
<
typename
DeviceGemmPtr_
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
void
RunDeviceGEMM
(
DeviceGemmPtr_
&
gemmPtr
,
const
ck
::
gemm_util
::
GemmParams
&
params
,
const
Tensor
<
ADataType
>&
A
,
const
Tensor
<
BDataType
>&
B
,
Tensor
<
CDataType
>&
C
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
CElementwiseOperation
c_element_op
)
{
DeviceMem
a_m_k_device_buf
(
sizeof
(
ADataType
)
*
A
.
mDesc
.
GetElementSpace
());
DeviceMem
b_k_n_device_buf
(
sizeof
(
BDataType
)
*
B
.
mDesc
.
GetElementSpace
());
DeviceMem
c_m_n_device_buf
(
sizeof
(
CDataType
)
*
C
.
mDesc
.
GetElementSpace
());
a_m_k_device_buf
.
ToDevice
(
A
.
mData
.
data
());
b_k_n_device_buf
.
ToDevice
(
B
.
mData
.
data
());
auto
invoker_ptr
=
gemmPtr
->
MakeInvokerPointer
();
auto
argument_ptr
=
gemmPtr
->
MakeArgumentPointer
(
static_cast
<
ADataType
*>
(
a_m_k_device_buf
.
GetDeviceBuffer
()),
static_cast
<
BDataType
*>
(
b_k_n_device_buf
.
GetDeviceBuffer
()),
static_cast
<
CDataType
*>
(
c_m_n_device_buf
.
GetDeviceBuffer
()),
params
.
M
,
params
.
N
,
params
.
K
,
params
.
StrideA
,
params
.
StrideB
,
params
.
StrideC
,
a_element_op
,
b_element_op
,
c_element_op
);
if
(
!
gemmPtr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
throw
std
::
runtime_error
(
"wrong! device_gemm with the specified compilation parameters does "
"not support this GEMM problem"
);
}
invoker_ptr
->
Run
(
argument_ptr
.
get
());
c_m_n_device_buf
.
FromDevice
(
C
.
mData
.
data
());
}
template
<
typename
DeviceGemmPtr_
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
typename
ALayout
,
typename
BLayout
,
typename
CLayout
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
struct
TestGemm
{
auto
PrepareGemmTensor
(
const
ck
::
gemm_util
::
GemmParams
&
params
)
{
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
std
::
is_same
<
decltype
(
layout
),
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
stride
,
1
}));
}
else
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
1
,
stride
}));
}
};
Tensor
<
ADataType
>
a_m_k
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
K
,
params
.
StrideA
,
ALayout
{}));
Tensor
<
BDataType
>
b_k_n
(
f_host_tensor_descriptor
(
params
.
K
,
params
.
N
,
params
.
StrideB
,
BLayout
{}));
Tensor
<
CDataType
>
c_m_n_host_result
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
Tensor
<
CDataType
>
c_m_n_device_result
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
auto
f_generate_tensor_value
=
[](
auto
desc
,
auto
type
)
{
using
dataType
=
decltype
(
type
);
if
(
std
::
is_same
<
dataType
,
int8_t
>::
value
)
{
desc
.
GenerateTensorValue
(
GeneratorTensor_2
<
int8_t
>
{
-
5
,
5
});
}
else
{
desc
.
GenerateTensorValue
(
GeneratorTensor_3
<
dataType
>
{
-
0.5
,
0.5
});
}
};
f_generate_tensor_value
(
a_m_k
,
ADataType
{});
f_generate_tensor_value
(
b_k_n
,
BDataType
{});
return
std
::
make_tuple
(
a_m_k
,
b_k_n
,
c_m_n_host_result
,
c_m_n_device_result
);
}
auto
operator
()(
DeviceGemmPtr_
&
gemmPtr
)
{
std
::
cout
<<
"ALayout = "
<<
ALayout
{}.
name
<<
", BLayout = "
<<
BLayout
{}.
name
<<
", CLayout = "
<<
CLayout
{}.
name
<<
std
::
endl
;
std
::
cout
<<
gemmPtr
->
GetTypeString
()
<<
std
::
endl
;
// Arrange
ck
::
gemm_util
::
GemmParams
params
;
params
.
M
=
1024
;
params
.
N
=
1024
;
params
.
K
=
1024
;
params
.
StrideA
=
1024
;
params
.
StrideB
=
1024
;
params
.
StrideC
=
1024
;
auto
host_tensors
=
PrepareGemmTensor
(
params
);
const
Tensor
<
ADataType
>&
a
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
BDataType
>&
b
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
CDataType
>&
c_host
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
CDataType
>&
c_device
=
std
::
get
<
3
>
(
host_tensors
);
auto
a_element_op
=
AElementwiseOperation
{};
auto
b_element_op
=
BElementwiseOperation
{};
auto
c_element_op
=
CElementwiseOperation
{};
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
>
;
ck
::
gemm_util
::
RunHostGEMM
<
ReferenceGemmInstance
>
(
a
,
b
,
c_host
,
a_element_op
,
b_element_op
,
c_element_op
);
// Act
ck
::
gemm_util
::
RunDeviceGEMM
(
gemmPtr
,
params
,
a
,
b
,
c_device
,
a_element_op
,
b_element_op
,
c_element_op
);
// Assert
bool
res
=
false
;
if
(
std
::
is_same
<
CDataType
,
float
>::
value
)
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
else
if
(
std
::
is_same
<
CDataType
,
ck
::
half_t
>::
value
)
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
else
if
(
std
::
is_same
<
CDataType
,
int8_t
>::
value
)
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
return
res
;
}
};
template
<
typename
DeviceGemmPtr_
,
typename
ALayout
,
typename
BLayout
,
typename
CLayout
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
struct
TestGemmBF16
{
using
BF16
=
ck
::
bhalf_t
;
auto
PrepareGemmTensorBF16
(
const
ck
::
gemm_util
::
GemmParams
&
params
)
{
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
std
::
is_same
<
decltype
(
layout
),
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
stride
,
1
}));
}
else
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
1
,
stride
}));
}
};
// use fp32 host kernel to verify bf16 device kernel
Tensor
<
BF16
>
a_m_k_bf16
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
K
,
params
.
StrideA
,
ALayout
{}));
Tensor
<
BF16
>
b_k_n_bf16
(
f_host_tensor_descriptor
(
params
.
K
,
params
.
N
,
params
.
StrideB
,
BLayout
{}));
Tensor
<
BF16
>
c_m_n_device_bf16
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
Tensor
<
float
>
a_m_k_fp32
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
K
,
params
.
StrideA
,
ALayout
{}));
Tensor
<
float
>
b_k_n_fp32
(
f_host_tensor_descriptor
(
params
.
K
,
params
.
N
,
params
.
StrideB
,
BLayout
{}));
Tensor
<
float
>
c_m_n_host_fp32
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
Tensor
<
float
>
c_m_n_device_fp32
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
a_m_k_bf16
.
GenerateTensorValue
(
GeneratorTensor_3
<
BF16
>
{
-
0.5
,
0.5
});
b_k_n_bf16
.
GenerateTensorValue
(
GeneratorTensor_3
<
BF16
>
{
-
0.5
,
0.5
});
bf16_to_f32_
(
a_m_k_bf16
,
a_m_k_fp32
);
bf16_to_f32_
(
b_k_n_bf16
,
b_k_n_fp32
);
return
std
::
make_tuple
(
a_m_k_bf16
,
b_k_n_bf16
,
c_m_n_device_bf16
,
a_m_k_fp32
,
b_k_n_fp32
,
c_m_n_host_fp32
,
c_m_n_device_fp32
);
}
auto
operator
()(
DeviceGemmPtr_
&
gemmPtr
)
{
// Arrange
ck
::
gemm_util
::
GemmParams
params
;
params
.
M
=
1024
;
params
.
N
=
1024
;
params
.
K
=
1024
;
params
.
StrideA
=
1024
;
params
.
StrideB
=
1024
;
params
.
StrideC
=
1024
;
auto
host_tensors
=
PrepareGemmTensorBF16
(
params
);
const
Tensor
<
BF16
>&
a_bf16
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
BF16
>&
b_bf16
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
BF16
>&
c_device_bf16
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
float
>&
a_fp32
=
std
::
get
<
3
>
(
host_tensors
);
Tensor
<
float
>&
b_fp32
=
std
::
get
<
4
>
(
host_tensors
);
Tensor
<
float
>&
c_host_fp32
=
std
::
get
<
5
>
(
host_tensors
);
Tensor
<
float
>&
c_device_fp32
=
std
::
get
<
6
>
(
host_tensors
);
auto
a_element_op
=
AElementwiseOperation
{};
auto
b_element_op
=
BElementwiseOperation
{};
auto
c_element_op
=
CElementwiseOperation
{};
// use fp32 host kernel to verify bf16 device kernel
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
float
,
float
,
float
,
AElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
>
;
ck
::
gemm_util
::
RunHostGEMM
<
ReferenceGemmInstance
>
(
a_fp32
,
b_fp32
,
c_host_fp32
,
a_element_op
,
b_element_op
,
c_element_op
);
// Act
ck
::
gemm_util
::
RunDeviceGEMM
(
gemmPtr
,
params
,
a_bf16
,
b_bf16
,
c_device_bf16
,
a_element_op
,
b_element_op
,
c_element_op
);
bf16_to_f32_
(
c_device_bf16
,
c_device_fp32
);
// Assert
bool
res
=
ck
::
utils
::
check_err
(
c_device_fp32
.
mData
,
c_host_fp32
.
mData
,
"Error: incorrect results!"
,
1e-2
f
,
1e-3
f
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
return
res
;
};
};
}
// namespace gemm_util
}
// namespace ck
#endif
#ifndef GEMM_UTILS_HPP
#define GEMM_UTILS_HPP
#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "reference_gemm.hpp"
#include "tensor_layout.hpp"
namespace
ck
{
namespace
gemm_util
{
struct
GemmParams
{
GemmParams
()
:
M
(
1024
),
N
(
1024
),
K
(
1024
),
StrideA
(
1024
),
StrideB
(
1024
),
StrideC
(
1024
),
alpha
(
1
),
beta
(
0
)
{
}
ck
::
index_t
M
;
ck
::
index_t
N
;
ck
::
index_t
K
;
ck
::
index_t
StrideA
;
ck
::
index_t
StrideB
;
ck
::
index_t
StrideC
;
float
alpha
;
float
beta
;
};
template
<
typename
GemmInstance
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
void
RunHostGEMM
(
const
Tensor
<
ADataType
>&
A
,
const
Tensor
<
BDataType
>&
B
,
Tensor
<
CDataType
>&
C
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
CElementwiseOperation
c_element_op
)
{
auto
ref_gemm
=
GemmInstance
{};
auto
ref_invoker
=
ref_gemm
.
MakeInvoker
();
auto
ref_argument
=
ref_gemm
.
MakeArgument
(
A
,
B
,
C
,
a_element_op
,
b_element_op
,
c_element_op
);
ref_invoker
.
Run
(
ref_argument
);
}
template
<
typename
DeviceGemmPtr_
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
void
RunDeviceGEMM
(
DeviceGemmPtr_
&
gemmPtr
,
const
ck
::
gemm_util
::
GemmParams
&
params
,
const
Tensor
<
ADataType
>&
A
,
const
Tensor
<
BDataType
>&
B
,
Tensor
<
CDataType
>&
C
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
CElementwiseOperation
c_element_op
)
{
DeviceMem
a_m_k_device_buf
(
sizeof
(
ADataType
)
*
A
.
mDesc
.
GetElementSpace
());
DeviceMem
b_k_n_device_buf
(
sizeof
(
BDataType
)
*
B
.
mDesc
.
GetElementSpace
());
DeviceMem
c_m_n_device_buf
(
sizeof
(
CDataType
)
*
C
.
mDesc
.
GetElementSpace
());
a_m_k_device_buf
.
ToDevice
(
A
.
mData
.
data
());
b_k_n_device_buf
.
ToDevice
(
B
.
mData
.
data
());
auto
invoker_ptr
=
gemmPtr
->
MakeInvokerPointer
();
auto
argument_ptr
=
gemmPtr
->
MakeArgumentPointer
(
static_cast
<
ADataType
*>
(
a_m_k_device_buf
.
GetDeviceBuffer
()),
static_cast
<
BDataType
*>
(
b_k_n_device_buf
.
GetDeviceBuffer
()),
static_cast
<
CDataType
*>
(
c_m_n_device_buf
.
GetDeviceBuffer
()),
params
.
M
,
params
.
N
,
params
.
K
,
params
.
StrideA
,
params
.
StrideB
,
params
.
StrideC
,
a_element_op
,
b_element_op
,
c_element_op
);
if
(
!
gemmPtr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
throw
std
::
runtime_error
(
"wrong! device_gemm with the specified compilation parameters does "
"not support this GEMM problem"
);
}
invoker_ptr
->
Run
(
argument_ptr
.
get
());
c_m_n_device_buf
.
FromDevice
(
C
.
mData
.
data
());
}
template
<
typename
DeviceGemmPtr_
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
typename
ALayout
,
typename
BLayout
,
typename
CLayout
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
struct
TestGemm
{
auto
PrepareGemmTensor
(
const
ck
::
gemm_util
::
GemmParams
&
params
)
{
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
std
::
is_same
<
decltype
(
layout
),
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
stride
,
1
}));
}
else
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
1
,
stride
}));
}
};
Tensor
<
ADataType
>
a_m_k
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
K
,
params
.
StrideA
,
ALayout
{}));
Tensor
<
BDataType
>
b_k_n
(
f_host_tensor_descriptor
(
params
.
K
,
params
.
N
,
params
.
StrideB
,
BLayout
{}));
Tensor
<
CDataType
>
c_m_n_host_result
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
Tensor
<
CDataType
>
c_m_n_device_result
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
auto
f_generate_tensor_value
=
[](
auto
&
tensor
,
auto
type
)
{
using
dataType
=
decltype
(
type
);
tensor
.
GenerateTensorValue
(
GeneratorTensor_2
<
dataType
>
{
-
5
,
5
});
};
f_generate_tensor_value
(
a_m_k
,
ADataType
{});
f_generate_tensor_value
(
b_k_n
,
BDataType
{});
return
std
::
make_tuple
(
a_m_k
,
b_k_n
,
c_m_n_host_result
,
c_m_n_device_result
);
}
auto
operator
()(
DeviceGemmPtr_
&
gemmPtr
)
{
std
::
cout
<<
"ALayout = "
<<
ALayout
{}.
name
<<
", BLayout = "
<<
BLayout
{}.
name
<<
", CLayout = "
<<
CLayout
{}.
name
<<
std
::
endl
;
std
::
cout
<<
gemmPtr
->
GetTypeString
()
<<
std
::
endl
;
// Arrange
ck
::
gemm_util
::
GemmParams
params
;
params
.
M
=
1024
;
params
.
N
=
1024
;
params
.
K
=
1024
;
params
.
StrideA
=
1024
;
params
.
StrideB
=
1024
;
params
.
StrideC
=
1024
;
auto
host_tensors
=
PrepareGemmTensor
(
params
);
const
Tensor
<
ADataType
>&
a
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
BDataType
>&
b
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
CDataType
>&
c_host
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
CDataType
>&
c_device
=
std
::
get
<
3
>
(
host_tensors
);
auto
a_element_op
=
AElementwiseOperation
{};
auto
b_element_op
=
BElementwiseOperation
{};
auto
c_element_op
=
CElementwiseOperation
{};
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
AElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
>
;
ck
::
gemm_util
::
RunHostGEMM
<
ReferenceGemmInstance
>
(
a
,
b
,
c_host
,
a_element_op
,
b_element_op
,
c_element_op
);
// Act
ck
::
gemm_util
::
RunDeviceGEMM
(
gemmPtr
,
params
,
a
,
b
,
c_device
,
a_element_op
,
b_element_op
,
c_element_op
);
// Assert
bool
res
=
false
;
if
(
std
::
is_same
<
CDataType
,
float
>::
value
)
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
else
if
(
std
::
is_same
<
CDataType
,
ck
::
half_t
>::
value
)
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
else
if
(
std
::
is_same
<
CDataType
,
int8_t
>::
value
)
{
res
=
ck
::
utils
::
check_err
(
c_device
.
mData
,
c_host
.
mData
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
}
return
res
;
}
};
template
<
typename
DeviceGemmPtr_
,
typename
ALayout
,
typename
BLayout
,
typename
CLayout
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CElementwiseOperation
>
struct
TestGemmBF16
{
using
BF16
=
ck
::
bhalf_t
;
auto
PrepareGemmTensorBF16
(
const
ck
::
gemm_util
::
GemmParams
&
params
)
{
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
std
::
is_same
<
decltype
(
layout
),
ck
::
tensor_layout
::
gemm
::
RowMajor
>::
value
)
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
stride
,
1
}));
}
else
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
row
,
col
}),
std
::
vector
<
std
::
size_t
>
({
1
,
stride
}));
}
};
// use fp32 host kernel to verify bf16 device kernel
Tensor
<
BF16
>
a_m_k_bf16
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
K
,
params
.
StrideA
,
ALayout
{}));
Tensor
<
BF16
>
b_k_n_bf16
(
f_host_tensor_descriptor
(
params
.
K
,
params
.
N
,
params
.
StrideB
,
BLayout
{}));
Tensor
<
BF16
>
c_m_n_device_bf16
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
Tensor
<
float
>
a_m_k_fp32
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
K
,
params
.
StrideA
,
ALayout
{}));
Tensor
<
float
>
b_k_n_fp32
(
f_host_tensor_descriptor
(
params
.
K
,
params
.
N
,
params
.
StrideB
,
BLayout
{}));
Tensor
<
float
>
c_m_n_host_fp32
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
Tensor
<
float
>
c_m_n_device_fp32
(
f_host_tensor_descriptor
(
params
.
M
,
params
.
N
,
params
.
StrideC
,
CLayout
{}));
a_m_k_bf16
.
GenerateTensorValue
(
GeneratorTensor_3
<
BF16
>
{
-
0.5
,
0.5
});
b_k_n_bf16
.
GenerateTensorValue
(
GeneratorTensor_3
<
BF16
>
{
-
0.5
,
0.5
});
bf16_to_f32_
(
a_m_k_bf16
,
a_m_k_fp32
);
bf16_to_f32_
(
b_k_n_bf16
,
b_k_n_fp32
);
return
std
::
make_tuple
(
a_m_k_bf16
,
b_k_n_bf16
,
c_m_n_device_bf16
,
a_m_k_fp32
,
b_k_n_fp32
,
c_m_n_host_fp32
,
c_m_n_device_fp32
);
}
auto
operator
()(
DeviceGemmPtr_
&
gemmPtr
)
{
// Arrange
ck
::
gemm_util
::
GemmParams
params
;
params
.
M
=
1024
;
params
.
N
=
1024
;
params
.
K
=
1024
;
params
.
StrideA
=
1024
;
params
.
StrideB
=
1024
;
params
.
StrideC
=
1024
;
auto
host_tensors
=
PrepareGemmTensorBF16
(
params
);
const
Tensor
<
BF16
>&
a_bf16
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
BF16
>&
b_bf16
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
BF16
>&
c_device_bf16
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
float
>&
a_fp32
=
std
::
get
<
3
>
(
host_tensors
);
Tensor
<
float
>&
b_fp32
=
std
::
get
<
4
>
(
host_tensors
);
Tensor
<
float
>&
c_host_fp32
=
std
::
get
<
5
>
(
host_tensors
);
Tensor
<
float
>&
c_device_fp32
=
std
::
get
<
6
>
(
host_tensors
);
auto
a_element_op
=
AElementwiseOperation
{};
auto
b_element_op
=
BElementwiseOperation
{};
auto
c_element_op
=
CElementwiseOperation
{};
// use fp32 host kernel to verify bf16 device kernel
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
float
,
float
,
float
,
AElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
>
;
ck
::
gemm_util
::
RunHostGEMM
<
ReferenceGemmInstance
>
(
a_fp32
,
b_fp32
,
c_host_fp32
,
a_element_op
,
b_element_op
,
c_element_op
);
// Act
ck
::
gemm_util
::
RunDeviceGEMM
(
gemmPtr
,
params
,
a_bf16
,
b_bf16
,
c_device_bf16
,
a_element_op
,
b_element_op
,
c_element_op
);
bf16_to_f32_
(
c_device_bf16
,
c_device_fp32
);
// Assert
bool
res
=
ck
::
utils
::
check_err
(
c_device_fp32
.
mData
,
c_host_fp32
.
mData
,
"Error: incorrect results!"
,
1e-2
f
,
1e-3
f
);
std
::
cout
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
return
res
;
};
};
}
// namespace gemm_util
}
// namespace ck
#endif
test/gemm_reduce/gemm_reduce_fp16.cpp
View file @
b134b7d6
...
...
@@ -16,22 +16,22 @@ int main()
pass
=
pass
&&
ck
::
profiler
::
profile_gemm_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
Row
,
Row
,
Row
>
(
true
,
1
,
false
,
1
,
M
,
N
,
K
,
K
,
N
,
N
);
true
,
1
,
false
,
false
,
M
,
N
,
K
,
K
,
N
,
N
);
pass
=
pass
&&
ck
::
profiler
::
profile_gemm_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
Row
,
Col
,
Row
>
(
true
,
1
,
false
,
1
,
M
,
N
,
K
,
K
,
K
,
N
);
true
,
1
,
false
,
false
,
M
,
N
,
K
,
K
,
K
,
N
);
pass
=
pass
&&
ck
::
profiler
::
profile_gemm_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
Col
,
Row
,
Row
>
(
true
,
1
,
false
,
1
,
M
,
N
,
K
,
M
,
N
,
N
);
true
,
1
,
false
,
false
,
M
,
N
,
K
,
M
,
N
,
N
);
pass
=
pass
&&
ck
::
profiler
::
profile_gemm_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
Col
,
Col
,
Row
>
(
true
,
1
,
false
,
1
,
M
,
N
,
K
,
M
,
K
,
N
);
true
,
1
,
false
,
false
,
M
,
N
,
K
,
M
,
K
,
N
);
if
(
pass
)
{
...
...
test/gemm_split_k/gemm_split_k.cpp
View file @
b134b7d6
...
...
@@ -45,7 +45,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
{
float
max_diff
=
1e-6
;
for
(
in
t
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
for
(
std
::
size_
t
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
{
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
if
(
max_diff
<
diff
)
...
...
@@ -187,9 +187,10 @@ int test_gemm(const gemmArgs& args)
if
(
gemm_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
invoker_ptr
->
Run
(
argument_ptr
.
get
()
,
0
);
invoker_ptr
->
Run
(
argument_ptr
.
get
());
c_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
if
(
!
check_out
(
c_m_n_host_result
,
c_m_n_device_result
))
{
success
=
false
;
...
...
test/grouped_gemm/grouped_gemm_fp16.cpp
View file @
b134b7d6
...
...
@@ -104,7 +104,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
b_tensors_device
.
reserve
(
group_count
);
c_tensors_device
.
reserve
(
group_count
);
for
(
in
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
{
a_tensors
.
emplace_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
gemm_shapes
[
i
].
M
,
gemm_shapes
[
i
].
K
,
gemm_shapes
[
i
].
StrideA
,
ALayout
{})));
...
...
@@ -119,7 +119,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
}
for
(
in
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
{
a_tensors_device
.
emplace_back
(
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_tensors
[
i
].
mDesc
.
GetElementSize
()));
...
...
@@ -147,7 +147,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
invoker_ptr
->
Run
(
argument_ptr
.
get
());
for
(
in
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
std
::
size_
t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
{
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
());
...
...
test/reduce/reduce_no_index.cpp
View file @
b134b7d6
...
...
@@ -460,7 +460,7 @@ class SimpleAppArgs
int
processArgs
(
int
argc
,
char
*
argv
[])
{
unsigned
int
ch
;
int
ch
;
while
(
1
)
{
...
...
test/reduce/reduce_util.hpp
View file @
b134b7d6
...
...
@@ -9,7 +9,7 @@ namespace reduce_util {
template
<
typename
T
>
void
to_f32_vector
(
const
Tensor
<
T
>&
src
,
Tensor
<
float
>&
dst
)
{
for
(
in
t
i
=
0
;
i
<
src
.
mData
.
size
();
++
i
)
for
(
std
::
size_
t
i
=
0
;
i
<
src
.
mData
.
size
();
++
i
)
dst
.
mData
[
i
]
=
type_convert
<
float
>
(
src
.
mData
[
i
]);
}
...
...
test/reduce/reduce_with_index.cpp
View file @
b134b7d6
...
...
@@ -463,7 +463,7 @@ class SimpleAppArgs
int
processArgs
(
int
argc
,
char
*
argv
[])
{
unsigned
int
ch
;
int
ch
;
while
(
1
)
{
...
...
test/reference_conv_fwd/CMakeLists.txt
View file @
b134b7d6
add_test_executable
(
test_reference_conv_fwd reference_conv_fwd.cpp
)
target_link_libraries
(
test_reference_conv_fwd PRIVATE host_tensor conv_
fwd_
util
)
add_
g
test_executable
(
test_reference_conv_fwd reference_conv_fwd.cpp
)
target_link_libraries
(
test_reference_conv_fwd PRIVATE host_tensor conv_util
)
test/reference_conv_fwd/reference_conv_fwd.cpp
View file @
b134b7d6
...
...
@@ -4,10 +4,11 @@
#include <numeric>
#include <type_traits>
#include <vector>
#include "gtest/gtest.h"
#include "check_err.hpp"
#include "config.hpp"
#include "conv_
fwd_
util.hpp"
#include "conv_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "host_tensor.hpp"
...
...
@@ -33,21 +34,21 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
const
FillInputOp
&
fill_input_op
=
FillInputOp
{},
const
FillWeightsOp
&
fill_weights_op
=
FillWeightsOp
{
0.5
f
})
{
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params
.
N
),
static_cast
<
std
::
size_t
>
(
params
.
C
)};
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params
.
N
_
),
static_cast
<
std
::
size_t
>
(
params
.
C
_
)};
input_dims
.
insert
(
std
::
end
(
input_dims
),
std
::
begin
(
params
.
input_spatial_lengths
),
std
::
end
(
params
.
input_spatial_lengths
));
std
::
begin
(
params
.
input_spatial_lengths
_
),
std
::
end
(
params
.
input_spatial_lengths
_
));
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params
.
K
),
static_cast
<
std
::
size_t
>
(
params
.
C
)};
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params
.
K
_
),
static_cast
<
std
::
size_t
>
(
params
.
C
_
)};
filter_dims
.
insert
(
std
::
end
(
filter_dims
),
std
::
begin
(
params
.
filter_spatial_lengths
),
std
::
end
(
params
.
filter_spatial_lengths
));
std
::
begin
(
params
.
filter_spatial_lengths
_
),
std
::
end
(
params
.
filter_spatial_lengths
_
));
const
std
::
vector
<
ck
::
index_t
>&
output_spatial_lengths
=
params
.
GetOutputSpatialLengths
();
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params
.
N
),
static_cast
<
std
::
size_t
>
(
params
.
K
)};
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params
.
N
_
),
static_cast
<
std
::
size_t
>
(
params
.
K
_
)};
output_dims
.
insert
(
std
::
end
(
output_dims
),
std
::
begin
(
output_spatial_lengths
),
std
::
end
(
output_spatial_lengths
));
...
...
@@ -73,32 +74,32 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
input
,
weights
,
host_output
,
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
,
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
_
,
params
.
input_right_pads
_
,
InElementOp
{},
WeiElementOp
{},
OutElementOp
{});
ref_invoker
.
Run
(
ref_argument
);
// std::cout <<"output: " << host_output.mDesc << std::endl << host_output.mData << std::endl;
return
host_output
;
}
bool
test_conv2d_nhwc
()
}
// anonymous namespace
TEST
(
ReferenceConvolutionFWD
,
Conv2DNHWC
)
{
bool
res
{
true
};
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
N
=
1
;
params
.
K
=
1
;
params
.
C
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
6
,
6
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
};
params
.
N
_
=
1
;
params
.
K
_
=
1
;
params
.
C
_
=
2
;
params
.
filter_spatial_lengths
_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths
_
=
std
::
vector
<
ck
::
index_t
>
{
6
,
6
};
params
.
conv_filter_strides
_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
conv_filter_dilations
_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_left_pads
_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
};
params
.
input_right_pads
_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
};
auto
out_tensor
=
run_reference_convolution_forward
<
2
>
(
params
);
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
4
,
4
};
...
...
@@ -118,51 +119,50 @@ bool test_conv2d_nhwc()
472.5
,
490.5
,
508.5
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimension
s!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect results!"
);
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect result
s!"
)
)
;
}
params
.
N
=
1
;
params
.
K
=
2
;
params
.
C
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
12
,
12
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
TEST
(
ReferenceConvolutionFWD
,
Conv2DNHWCStridesDilationsPadding
)
{
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
N_
=
1
;
params
.
K_
=
2
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
12
,
12
};
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
};
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
};
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
out_tensor
=
run_reference_convolution_forward
<
2
>
(
params
);
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
1
,
2
,
5
,
5
};
ref_data
=
std
::
vector
<
float
>
{
auto
out_tensor
=
run_reference_convolution_forward
<
2
>
(
params
);
std
::
vector
<
std
::
size_t
>
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
1
,
2
,
5
,
5
};
std
::
vector
<
float
>
ref_data
{
210.
,
210.
,
327.
,
327.
,
351.
,
351.
,
375.
,
375.
,
399.
,
399.
,
459.
,
459.
,
706.5
,
706.5
,
742.5
,
742.5
,
778.5
,
778.5
,
814.5
,
814.5
,
747.
,
747.
,
1138.5
,
1138.5
,
1174.5
,
1174.5
,
1210.5
,
1210.5
,
1246.5
,
1246.5
,
1035.
,
1035.
,
1570.5
,
1570.5
,
1606.5
,
1606.5
,
1642.5
,
1642.5
,
1678.5
,
1678.5
,
1323.
,
1323.
,
2002.5
,
2002.5
,
2038.5
,
2038.5
,
2074.5
,
2074.5
,
2110.5
,
2110.5
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect results!"
);
return
res
;
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect results!"
));
}
bool
test_conv1d_nwc
(
)
TEST
(
ReferenceConvolutionFWD
,
Conv1DNWC
)
{
bool
res
{
true
};
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
1
;
params
.
N
=
1
;
params
.
K
=
1
;
params
.
C
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
6
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
};
params
.
num_dim_spatial
_
=
1
;
params
.
N
_
=
1
;
params
.
K
_
=
1
;
params
.
C
_
=
2
;
params
.
filter_spatial_lengths
_
=
std
::
vector
<
ck
::
index_t
>
{
3
};
params
.
input_spatial_lengths
_
=
std
::
vector
<
ck
::
index_t
>
{
6
};
params
.
conv_filter_strides
_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
conv_filter_dilations
_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_left_pads
_
=
std
::
vector
<
ck
::
index_t
>
{
0
};
params
.
input_right_pads
_
=
std
::
vector
<
ck
::
index_t
>
{
0
};
auto
out_tensor
=
run_reference_convolution_forward
<
1
,
...
...
@@ -174,46 +174,53 @@ bool test_conv1d_nwc()
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
);
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
4
};
std
::
vector
<
float
>
ref_data
{
7.5
,
13.5
,
19.5
,
25.5
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimension
s!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect results!"
);
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect result
s!"
)
)
;
}
params
.
num_dim_spatial
=
1
;
params
.
N
=
1
;
params
.
K
=
2
;
params
.
C
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
12
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
2
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
2
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
TEST
(
ReferenceConvolutionFWD
,
Conv1DNWCStridesDilationsPadding
)
{
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial_
=
1
;
params
.
N_
=
1
;
params
.
K_
=
2
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
};
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
12
};
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
2
};
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
2
};
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
out_tensor
=
run_reference_convolution_forward
<
1
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
);
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
1
,
2
,
5
};
ref_data
=
std
::
vector
<
float
>
{
9.
,
9.
,
19.5
,
19.5
,
31.5
,
31.5
,
43.5
,
43.5
,
55.5
,
55.5
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect results!"
);
auto
out_tensor
=
run_reference_convolution_forward
<
1
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
);
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
2
,
5
};
std
::
vector
<
float
>
ref_data
{
9.
,
9.
,
19.5
,
19.5
,
31.5
,
31.5
,
43.5
,
43.5
,
55.5
,
55.5
};
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error: incorrect results!"
));
}
params
.
num_dim_spatial
=
1
;
params
.
N
=
2
;
params
.
K
=
16
;
params
.
C
=
4
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
16
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
TEST
(
ReferenceConvolutionFWD
,
Conv1DNWCSameOutputSize
)
{
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial_
=
1
;
params
.
N_
=
2
;
params
.
K_
=
16
;
params
.
C_
=
4
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
};
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
16
};
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
1
};
auto
out_tensor2
=
run_reference_convolution_forward
<
1
,
float
,
...
...
@@ -224,8 +231,8 @@ bool test_conv1d_nwc()
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
2
,
16
,
16
};
ref_data
=
std
::
vector
<
float
>
{
std
::
vector
<
std
::
size_t
>
ref_dims
{
2
,
16
,
16
};
std
::
vector
<
float
>
ref_data
{
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
1.4
,
3.3
,
3.3
,
3.3
,
3.3
,
3.3
,
3.3
,
3.3
,
3.3
,
...
...
@@ -290,28 +297,24 @@ bool test_conv1d_nwc()
72.9
,
72.9
,
72.9
,
72.9
,
72.9
,
72.9
,
72.9
,
72.9
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
,
49.4
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor2
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor2
.
mData
,
ref_data
,
"Error: incorrect results!"
);
return
res
;
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor2
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor2
.
mData
,
ref_data
,
"Error: incorrect results!"
));
}
bool
test_conv3d_ncdhw
(
)
TEST
(
ReferenceConvolutionFWD
,
Conv3DNCDHW
)
{
bool
res
{
true
};
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
3
;
params
.
N
=
1
;
params
.
K
=
1
;
params
.
C
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
6
,
6
,
6
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
params
.
num_dim_spatial
_
=
3
;
params
.
N
_
=
1
;
params
.
K
_
=
1
;
params
.
C
_
=
2
;
params
.
filter_spatial_lengths
_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
};
params
.
input_spatial_lengths
_
=
std
::
vector
<
ck
::
index_t
>
{
6
,
6
,
6
};
params
.
conv_filter_strides
_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
conv_filter_dilations
_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_left_pads
_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
params
.
input_right_pads
_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
auto
out_tensor
=
run_reference_convolution_forward
<
3
,
float
,
...
...
@@ -331,32 +334,37 @@ bool test_conv3d_ncdhw()
634.5
,
637.2
,
639.9
,
642.60004
,
650.7
,
653.4
,
656.10004
,
658.8
,
699.3
,
702.
,
704.7
,
707.4
,
715.5
,
718.2
,
720.9
,
723.60004
,
731.7
,
734.4001
,
737.10004
,
739.8
,
747.9001
,
750.60004
,
753.3
,
756.
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error [case 1]: wrong output tensor dimensions!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 1]: incorrect results!"
);
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error [case 1]: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 1]: incorrect results!"
));
}
params
.
N
=
1
;
params
.
K
=
2
;
params
.
C
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
12
,
12
,
12
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
};
params
.
conv_filter_dilations
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
TEST
(
ReferenceConvolutionFWD
,
Conv3DNCDHWStridesDilations
)
{
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial_
=
3
;
params
.
N_
=
1
;
params
.
K_
=
2
;
params
.
C_
=
2
;
params
.
filter_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
};
params
.
input_spatial_lengths_
=
std
::
vector
<
ck
::
index_t
>
{
12
,
12
,
12
};
params
.
conv_filter_strides_
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
3
};
params
.
conv_filter_dilations_
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_left_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
params
.
input_right_pads_
=
std
::
vector
<
ck
::
index_t
>
{
0
,
0
,
0
};
out_tensor
=
run_reference_convolution_forward
<
3
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NCDHW
,
ck
::
tensor_layout
::
convolution
::
KCZYX
,
ck
::
tensor_layout
::
convolution
::
NKDHW
>
(
auto
out_tensor
=
run_reference_convolution_forward
<
3
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NCDHW
,
ck
::
tensor_layout
::
convolution
::
KCZYX
,
ck
::
tensor_layout
::
convolution
::
NKDHW
>
(
params
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
1
,
2
,
4
,
4
,
4
};
ref_data
=
std
::
vector
<
float
>
{
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
2
,
4
,
4
,
4
};
std
::
vector
<
float
>
ref_data
{
2756.7002
,
2764.7998
,
2772.9001
,
2781.
,
2853.9001
,
2862.
,
2870.1
,
2878.2002
,
2951.1
,
2959.2002
,
2967.2998
,
2975.4001
,
3048.2998
,
3056.4001
,
3064.5
,
3072.6
,
3923.1
,
3931.2
,
3939.2998
,
3947.4
,
4020.2998
,
4028.4001
,
4036.5002
,
4044.5999
,
...
...
@@ -373,26 +381,9 @@ bool test_conv3d_ncdhw()
5283.9004
,
5292.
,
5300.0996
,
5308.2
,
5381.0996
,
5389.2
,
5397.3
,
5405.4004
,
6255.9004
,
6264.0005
,
6272.1
,
6280.2
,
6353.1
,
6361.2
,
6369.301
,
6377.4
,
6450.301
,
6458.4
,
6466.5
,
6474.6
,
6547.5
,
6555.6
,
6563.699
,
6571.801
};
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error [case 2]: wrong output tensor dimensions!"
);
res
=
res
&&
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 2]: incorrect results!"
,
1e-4
f
,
1e-6
f
);
return
res
;
}
}
// anonymous namespace
int
main
(
void
)
{
bool
res
{
true
};
res
=
test_conv2d_nhwc
();
std
::
cout
<<
"test_conv2d_nhwc ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
res
=
test_conv1d_nwc
();
std
::
cout
<<
"TestConv1DNHWC ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
res
=
test_conv3d_ncdhw
();
std
::
cout
<<
"test_conv3d_ncdhw ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
return
res
?
0
:
1
;
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mDesc
.
GetLengths
(),
ref_dims
,
"Error [case 2]: wrong output tensor dimensions!"
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_tensor
.
mData
,
ref_data
,
"Error [case 2]: incorrect results!"
,
1e-4
f
,
1e-6
f
));
}
Prev
1
…
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment