Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
76ee0baf
Commit
76ee0baf
authored
Apr 22, 2022
by
Chao Liu
Browse files
Merge remote-tracking branch 'origin/develop' into improve_pipeline
parents
4816890d
1a0cd5d1
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
199 additions
and
380 deletions
+199
-380
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+3
-2
test/conv2d_bwd_weight/CMakeLists.txt
test/conv2d_bwd_weight/CMakeLists.txt
+1
-2
test/conv_util/CMakeLists.txt
test/conv_util/CMakeLists.txt
+1
-1
test/convnd_bwd_data/CMakeLists.txt
test/convnd_bwd_data/CMakeLists.txt
+1
-2
test/convnd_fwd/CMakeLists.txt
test/convnd_fwd/CMakeLists.txt
+4
-6
test/convnd_fwd/conv1d_fwd.cpp
test/convnd_fwd/conv1d_fwd.cpp
+36
-78
test/convnd_fwd/conv2d_fwd.cpp
test/convnd_fwd/conv2d_fwd.cpp
+31
-75
test/convnd_fwd/conv3d_fwd.cpp
test/convnd_fwd/conv3d_fwd.cpp
+107
-162
test/convnd_fwd/conv_util.hpp
test/convnd_fwd/conv_util.hpp
+8
-16
test/reference_conv_fwd/CMakeLists.txt
test/reference_conv_fwd/CMakeLists.txt
+1
-1
test/reference_conv_fwd/reference_conv_fwd.cpp
test/reference_conv_fwd/reference_conv_fwd.cpp
+6
-35
No files found.
profiler/src/profiler.cpp
View file @
76ee0baf
...
...
@@ -4,6 +4,8 @@
#include <cstdlib>
#include <cstring>
#include "profile_convnd_fwd.hpp"
int
profile_gemm
(
int
,
char
*
[]);
int
profile_gemm_bias_2d
(
int
,
char
*
[]);
int
profile_gemm_bias_relu
(
int
,
char
*
[]);
...
...
@@ -11,7 +13,6 @@ int profile_gemm_bias_relu_add(int, char*[]);
int
profile_gemm_reduce
(
int
,
char
*
[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
int
profile_grouped_gemm
(
int
,
char
*
[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_atomic_add
(
int
,
char
*
[]);
...
...
@@ -56,7 +57,7 @@ int main(int argc, char* argv[])
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd"
)
==
0
)
{
return
profile_conv_fwd
(
argc
,
argv
);
return
ck
::
profiler
::
profile_conv
nd
_fwd
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu"
)
==
0
)
{
...
...
test/conv2d_bwd_weight/CMakeLists.txt
View file @
76ee0baf
...
...
@@ -4,5 +4,4 @@ include_directories(BEFORE
)
add_test_executable
(
test_conv2d_bwd_weight conv2d_bwd_weight.cpp
)
target_link_libraries
(
test_conv2d_bwd_weight PRIVATE host_tensor
)
target_link_libraries
(
test_conv2d_bwd_weight PRIVATE device_conv2d_bwd_weight_instance
)
target_link_libraries
(
test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_fwd_util
)
test/conv_util/CMakeLists.txt
View file @
76ee0baf
add_test_executable
(
test_conv_util conv_util.cpp
)
target_link_libraries
(
test_conv_util PRIVATE host_tensor
)
target_link_libraries
(
test_conv_util PRIVATE host_tensor
conv_fwd_util
)
test/convnd_bwd_data/CMakeLists.txt
View file @
76ee0baf
...
...
@@ -4,5 +4,4 @@ include_directories(BEFORE
)
add_test_executable
(
test_convnd_bwd_data convnd_bwd_data.cpp
)
target_link_libraries
(
test_convnd_bwd_data PRIVATE host_tensor
)
target_link_libraries
(
test_convnd_bwd_data PRIVATE device_convnd_bwd_data_instance
)
target_link_libraries
(
test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_fwd_util
)
test/convnd_fwd/CMakeLists.txt
View file @
76ee0baf
add_custom_target
(
test_convnd_fwd
)
add_test_executable
(
test_conv1d_fwd conv1d_fwd.cpp
)
target_link_libraries
(
test_conv1d_fwd PRIVATE host_tensor
)
target_link_libraries
(
test_conv1d_fwd PRIVATE
device_conv1d_fwd_instance
)
target_link_libraries
(
test_conv1d_fwd PRIVATE host_tensor
device_conv1d_fwd_instance conv_fwd_util
)
target_link_libraries
(
test_conv1d_fwd PRIVATE
)
add_dependencies
(
test_convnd_fwd test_conv1d_fwd
)
add_test_executable
(
test_conv2d_fwd conv2d_fwd.cpp
)
target_link_libraries
(
test_conv2d_fwd PRIVATE host_tensor
)
target_link_libraries
(
test_conv2d_fwd PRIVATE device_conv2d_fwd_instance
)
target_link_libraries
(
test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance conv_fwd_util
)
add_dependencies
(
test_convnd_fwd test_conv2d_fwd
)
add_test_executable
(
test_conv3d_fwd conv3d_fwd.cpp
)
target_link_libraries
(
test_conv3d_fwd PRIVATE host_tensor
)
target_link_libraries
(
test_conv3d_fwd PRIVATE device_conv3d_fwd_instance
)
target_link_libraries
(
test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_fwd_util
)
add_dependencies
(
test_convnd_fwd test_conv3d_fwd
)
test/convnd_fwd/conv1d_fwd.cpp
View file @
76ee0baf
...
...
@@ -7,37 +7,15 @@
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using
DeviceConvFwdNoOpPtr
=
ck
::
tensor_operation
::
device
::
DeviceConvFwdPtr
<
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
>
;
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
device_conv1d_fwd_instance
{
void
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
}
// namespace device_conv1d_fwd_instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
namespace
{
bool
test_conv1D_nwc
()
{
bool
res
{
true
};
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
namespace
ctl
=
ck
::
tensor_layout
::
convolution
;
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
1
;
params
.
N
=
2
;
...
...
@@ -50,30 +28,26 @@ bool test_conv1D_nwc()
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
);
const
Tensor
<
float
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
float
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
float
>&
host_output
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
float
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
ck
::
utils
::
conv
::
run_reference_convolution_forward
<
1
>
(
params
,
input
,
weights
,
host_output
);
test
::
conv
::
RunConv
<
1
>
(
params
,
input
,
weights
,
device_output
);
res
=
res
&&
ck
::
utils
::
check_err
(
device_output
.
mData
,
host_output
.
mData
,
"Error: incorrect results!"
,
1e-5
f
,
1e-4
f
);
return
res
;
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
test
::
conv
::
get_test_convolution_fwd_instance
<
1
>
(
conv_ptrs
);
conv
::
ConvFwdOpInstance
<
float
,
float
,
float
,
ctl
::
NWC
,
ctl
::
KCX
,
ctl
::
NWK
>
conv_instance
(
params
);
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
1
,
float
,
float
,
float
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
float
,
float
,
float
>
run_engine
(
conv_instance
,
reference_conv_fwd_fun
);
run_engine
.
SetAtol
(
1e-5
);
run_engine
.
SetRtol
(
1e-4
);
return
run_engine
.
Test
(
conv_ptrs
);
}
template
<
typename
T
>
bool
test_conv1d_nwc_instances
(
const
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
conv_ptrs
)
bool
test_conv1d_nwc_instances
(
const
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>&
conv_ptrs
)
{
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
namespace
ctl
=
ck
::
tensor_layout
::
convolution
;
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
1
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
};
...
...
@@ -83,52 +57,36 @@ bool test_conv1d_nwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_ptr
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
T
,
T
,
T
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
);
const
Tensor
<
T
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
T
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
T
>&
host_output
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
T
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
ck
::
utils
::
conv
::
run_reference_convolution_forward
<
1
>
(
params
,
input
,
weights
,
host_output
);
return
ck
::
utils
::
conv
::
run_convolution_forward_instances
<
1
>
(
params
,
conv_ptrs
,
input
,
weights
,
device_output
,
host_output
);
conv
::
ConvFwdOpInstance
<
T
,
T
,
T
,
ctl
::
NWC
,
ctl
::
KCX
,
ctl
::
NWK
>
conv_instance
(
params
);
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
1
,
T
,
T
,
T
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
T
,
T
,
T
>
run_engine
(
conv_instance
,
reference_conv_fwd_fun
);
return
run_engine
.
Test
(
conv_ptrs
);
}
bool
test_conv1d_nwc_bf16_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv1d_fwd_instance
::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_bf16_instances
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
ck
::
bhalf_t
>
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
ck
::
bhalf_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>::
Get
<
1
>
());
}
bool
test_conv1d_nwc_f16_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv1d_fwd_instance
::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f16_instances
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
ck
::
half_t
>
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
ck
::
half_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>::
Get
<
1
>
());
}
bool
test_conv1d_nwc_f32_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv1d_fwd_instance
::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_f32_instances
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
float
>
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
float
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
float
,
float
,
float
>::
Get
<
1
>
());
}
bool
test_conv1d_nwc_int8_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv1d_fwd_instance
::
add_device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instances
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
int8_t
>
(
conv_ptrs
);
return
test_conv1d_nwc_instances
<
int8_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
int8_t
,
int8_t
,
int8_t
>::
Get
<
1
>
());
}
}
// anonymous namespace
...
...
@@ -149,7 +107,7 @@ int main()
std
::
cout
<<
"
\n
test_conv1d_nwc_f32_instances ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
res
=
test_conv1d_nwc_int8_instances
();
std
::
cout
<<
"
\n
tes
_
tconv1
_dnw_c
int
_
8instances ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
std
::
cout
<<
"
\n
test
_
conv1
d_nwc_
int8
_
instances ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
return
res
?
0
:
1
;
...
...
test/convnd_fwd/conv2d_fwd.cpp
View file @
76ee0baf
#include <half.hpp>
#include <iostream>
#include <stdexcept>
#include <tuple>
#include <vector>
...
...
@@ -8,38 +7,14 @@
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using
DeviceConvFwdNoOpPtr
=
ck
::
tensor_operation
::
device
::
DeviceConvFwdPtr
<
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
>
;
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
device_conv2d_fwd_instance
{
void
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
}
// namespace device_conv2d_fwd_instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
namespace
{
bool
test_conv2d_nhwc
()
{
bool
res
{
true
};
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
ck
::
utils
::
conv
::
ConvParams
params
;
params
.
N
=
2
;
params
.
K
=
16
;
...
...
@@ -47,25 +22,25 @@ bool test_conv2d_nhwc()
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
16
,
16
};
params
.
conv_filter_strides
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
(
params
);
const
Tensor
<
float
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
float
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
float
>&
host_output
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
float
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
ck
::
utils
::
conv
::
run_reference_convolution_forward
<
2
>
(
params
,
input
,
weights
,
host_output
);
test
::
conv
::
RunConv
<
2
>
(
params
,
input
,
weights
,
device_output
);
res
=
res
&&
ck
::
utils
::
check_err
(
device_output
.
mData
,
host_output
.
mData
,
"Error: incorrect results!"
,
1e-5
f
,
1e-4
f
);
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
test
::
conv
::
get_test_convolution_fwd_instance
<
2
>
(
conv_ptrs
);
conv
::
ConvFwdOpInstance
<
float
,
float
,
float
>
conv_instance
(
params
);
return
res
;
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
2
,
float
,
float
,
float
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
float
,
float
,
float
>
run_engine
(
conv_instance
,
reference_conv_fwd_fun
);
run_engine
.
SetAtol
(
1e-5
);
run_engine
.
SetRtol
(
1e-4
);
return
run_engine
.
Test
(
conv_ptrs
);
}
template
<
typename
T
>
bool
test_conv2d_nhwc_instances
(
const
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
conv_ptrs
)
bool
test_conv2d_nhwc_instances
(
const
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>&
conv_ptrs
)
{
ck
::
utils
::
conv
::
ConvParams
params
;
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
2
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
};
params
.
input_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
71
,
71
};
...
...
@@ -74,55 +49,36 @@ bool test_conv2d_nhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_pt
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
T
,
T
,
T
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
params
);
const
Tensor
<
T
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
T
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
T
>&
host_output
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
T
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
ck
::
utils
::
conv
::
run_reference_convolution_forward
<
2
>
(
params
,
input
,
weights
,
host_output
);
return
ck
::
utils
::
conv
::
run_convolution_forward_instances
<
2
>
(
params
,
conv_ptrs
,
input
,
weights
,
device_output
,
host_output
);
conv
::
ConvFwdOpInstance
<
T
,
T
,
T
>
conv_instance
(
params
);
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
2
,
T
,
T
,
T
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
T
,
T
,
T
>
run_engine
(
conv_instance
,
reference_conv_fwd_fun
);
return
run_engine
.
Test
(
conv_ptrs
);
}
bool
test_conv2d_nhwc_bf16_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
ck
::
bhalf_t
>
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
ck
::
bhalf_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>::
Get
<
2
>
());
}
bool
test_conv2d_nhwc_f16_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances
(
conv_ptrs
);
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
ck
::
half_t
>
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
ck
::
half_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>::
Get
<
2
>
());
}
bool
test_conv2d_nhwc_f32_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
float
>
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
float
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
float
,
float
,
float
>::
Get
<
2
>
());
}
bool
test_conv2d_nhwc_int8_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
int8_t
>
(
conv_ptrs
);
return
test_conv2d_nhwc_instances
<
int8_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
int8_t
,
int8_t
,
int8_t
>::
Get
<
2
>
());
}
}
// anonymous namespace
...
...
test/convnd_fwd/conv3d_fwd.cpp
View file @
76ee0baf
...
...
@@ -8,37 +8,16 @@
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "host_tensor.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
// Forward declarations for conv instances.
using
DeviceConvFwdNoOpPtr
=
ck
::
tensor_operation
::
device
::
DeviceConvFwdPtr
<
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
>
;
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
device_conv3d_fwd_instance
{
void
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
}
// namespace device_conv3d_fwd_instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
namespace
{
bool
test_conv3d_ndhwc
()
{
bool
res
{
true
};
ck
::
utils
::
conv
::
ConvParams
params
;
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
namespace
ctl
=
ck
::
tensor_layout
::
convolution
;
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
3
;
params
.
N
=
2
;
params
.
K
=
16
;
...
...
@@ -50,31 +29,26 @@ bool test_conv3d_ndhwc()
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
params
);
const
Tensor
<
float
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
float
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
float
>&
host_output
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
float
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
ck
::
utils
::
conv
::
run_reference_convolution_forward
<
3
>
(
params
,
input
,
weights
,
host_output
);
test
::
conv
::
RunConv
<
3
>
(
params
,
input
,
weights
,
device_output
);
res
=
res
&&
ck
::
utils
::
check_err
(
device_output
.
mData
,
host_output
.
mData
,
"Error: incorrect results!"
,
1e-5
f
,
1e-4
f
);
return
res
;
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
test
::
conv
::
get_test_convolution_fwd_instance
<
3
>
(
conv_ptrs
);
conv
::
ConvFwdOpInstance
<
float
,
float
,
float
,
ctl
::
NDHWC
,
ctl
::
KZYXC
,
ctl
::
NDHWK
>
conv_instance
(
params
);
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
3
,
float
,
float
,
float
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
float
,
float
,
float
>
run_engine
(
conv_instance
,
reference_conv_fwd_fun
);
run_engine
.
SetAtol
(
1e-5
);
run_engine
.
SetRtol
(
1e-4
);
return
run_engine
.
Test
(
conv_ptrs
);
}
bool
test_conv3d_ndhwc_2gb_input
()
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
namespace
ck
::
utils
;
// >2GB Input
ck
::
utils
::
conv
::
ConvParams
params
;
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
3
;
params
.
N
=
2
;
params
.
K
=
16
;
...
...
@@ -86,39 +60,35 @@ bool test_conv3d_ndhwc_2gb_input()
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
params
,
false
);
const
Tensor
<
float
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
float
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
float
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
try
{
test
::
conv
::
RunConv
<
3
>
(
params
,
input
,
weights
,
device_output
);
}
catch
(
const
std
::
runtime_error
&
err
)
{
std
::
string
err_msg
{
"Error! device_conv with the specified compilation parameters does "
"not support this Conv problem"
};
if
(
err
.
what
()
!=
err_msg
)
{
return
false
;
}
return
true
;
}
std
::
cout
<<
"Error: Failure checking oversized tensor!"
<<
std
::
endl
;
return
false
;
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
test
::
conv
::
get_test_convolution_fwd_instance
<
3
>
(
conv_ptrs
);
auto
arg
=
conv_ptrs
.
back
()
->
MakeArgumentPointer
(
nullptr
,
nullptr
,
nullptr
,
params
.
N
,
params
.
K
,
params
.
C
,
params
.
input_spatial_lengths
,
params
.
filter_spatial_lengths
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
return
!
(
conv_ptrs
.
back
()
->
IsSupportedArgument
(
arg
.
get
()));
}
bool
test_conv3d_ndhwc_2gb_filters
()
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
namespace
ck
::
utils
;
// >2GB Filters
ck
::
utils
::
conv
::
ConvParams
params
;
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
3
;
params
.
N
=
2
;
params
.
K
=
16
;
...
...
@@ -130,39 +100,35 @@ bool test_conv3d_ndhwc_2gb_filters()
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
params
,
false
);
const
Tensor
<
float
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
float
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
float
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
try
{
test
::
conv
::
RunConv
<
3
>
(
params
,
input
,
weights
,
device_output
);
}
catch
(
const
std
::
runtime_error
&
err
)
{
std
::
string
err_msg
{
"Error! device_conv with the specified compilation parameters does "
"not support this Conv problem"
};
if
(
err
.
what
()
!=
err_msg
)
{
return
false
;
}
return
true
;
}
std
::
cout
<<
"Error: Failure checking oversized tensor!"
<<
std
::
endl
;
return
false
;
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
test
::
conv
::
get_test_convolution_fwd_instance
<
3
>
(
conv_ptrs
);
auto
arg
=
conv_ptrs
.
back
()
->
MakeArgumentPointer
(
nullptr
,
nullptr
,
nullptr
,
params
.
N
,
params
.
K
,
params
.
C
,
params
.
input_spatial_lengths
,
params
.
filter_spatial_lengths
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
return
!
(
conv_ptrs
.
back
()
->
IsSupportedArgument
(
arg
.
get
()));
}
bool
test_conv3d_ndhwc_2gb_output
()
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
namespace
ck
::
utils
;
// >2GB Output
ck
::
utils
::
conv
::
ConvParams
params
;
conv
::
ConvParams
params
;
params
.
num_dim_spatial
=
3
;
params
.
N
=
2
;
params
.
K
=
16
;
...
...
@@ -174,39 +140,35 @@ bool test_conv3d_ndhwc_2gb_output()
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
,
2
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
2
,
2
,
2
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
params
,
false
);
const
Tensor
<
float
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
float
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
float
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
try
{
test
::
conv
::
RunConv
<
3
>
(
params
,
input
,
weights
,
device_output
);
}
catch
(
const
std
::
runtime_error
&
err
)
{
std
::
string
err_msg
{
"Error! device_conv with the specified compilation parameters does "
"not support this Conv problem"
};
if
(
err
.
what
()
!=
err_msg
)
{
return
false
;
}
return
true
;
}
std
::
cout
<<
"Error: Failure checking oversized tensor!"
<<
std
::
endl
;
return
false
;
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
test
::
conv
::
get_test_convolution_fwd_instance
<
3
>
(
conv_ptrs
);
auto
arg
=
conv_ptrs
.
back
()
->
MakeArgumentPointer
(
nullptr
,
nullptr
,
nullptr
,
params
.
N
,
params
.
K
,
params
.
C
,
params
.
input_spatial_lengths
,
params
.
filter_spatial_lengths
,
params
.
GetOutputSpatialLengths
(),
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
return
!
(
conv_ptrs
.
back
()
->
IsSupportedArgument
(
arg
.
get
()));
}
template
<
typename
T
>
bool
test_conv3d_ndhwc_instances
(
const
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
conv_ptrs
)
bool
test_conv3d_ndhwc_instances
(
const
std
::
vector
<
test
::
conv
::
DeviceConvFwdNoOpPtr
>&
conv_ptrs
)
{
ck
::
utils
::
conv
::
ConvParams
params
;
using
namespace
std
::
placeholders
;
using
namespace
ck
::
utils
;
namespace
ctl
=
ck
::
tensor_layout
::
convolution
;
conv
::
ConvParams
params
;
params
.
N
=
64
;
params
.
num_dim_spatial
=
3
;
params
.
filter_spatial_lengths
=
std
::
vector
<
ck
::
index_t
>
{
3
,
3
,
2
};
...
...
@@ -216,53 +178,36 @@ bool test_conv3d_ndhwc_instances(const std::vector<DeviceConvFwdNoOpPtr>& conv_p
params
.
input_left_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
params
.
input_right_pads
=
std
::
vector
<
ck
::
index_t
>
{
1
,
1
,
1
};
auto
host_tensors
=
ck
::
utils
::
conv
::
get_host_tensors
<
T
,
T
,
T
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
(
params
);
const
Tensor
<
T
>&
input
=
std
::
get
<
0
>
(
host_tensors
);
const
Tensor
<
T
>&
weights
=
std
::
get
<
1
>
(
host_tensors
);
Tensor
<
T
>&
host_output
=
std
::
get
<
2
>
(
host_tensors
);
Tensor
<
T
>&
device_output
=
std
::
get
<
3
>
(
host_tensors
);
conv
::
ConvFwdOpInstance
<
T
,
T
,
T
,
ctl
::
NDHWC
,
ctl
::
KZYXC
,
ctl
::
NDHWK
>
conv_instance
(
params
);
ck
::
utils
::
conv
::
run_reference_convolution_forward
<
3
>
(
params
,
input
,
weights
,
host_output
);
return
ck
::
utils
::
conv
::
run_convolution_forward_instances
<
3
>
(
params
,
conv_ptrs
,
input
,
weights
,
device_output
,
host_output
);
auto
reference_conv_fwd_fun
=
std
::
bind
(
conv
::
run_reference_convolution_forward
<
3
,
T
,
T
,
T
>
,
params
,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
T
,
T
,
T
>
run_engine
(
conv_instance
,
reference_conv_fwd_fun
);
return
run_engine
.
Test
(
conv_ptrs
);
}
bool
test_conv3d_ndhwc_bf16_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv3d_fwd_instance
::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instances
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
ck
::
bhalf_t
>
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
ck
::
bhalf_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>::
Get
<
3
>
());
}
bool
test_conv3d_ndhwc_f16_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv3d_fwd_instance
::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f16_instances
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
ck
::
half_t
>
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
ck
::
half_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>::
Get
<
3
>
());
}
bool
test_conv3d_ndhwc_f32_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv3d_fwd_instance
::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_f32_instances
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
float
>
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
float
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
float
,
float
,
float
>::
Get
<
3
>
());
}
bool
test_conv3d_ndhwc_int8_instances
()
{
std
::
vector
<
DeviceConvFwdNoOpPtr
>
conv_ptrs
;
ck
::
tensor_operation
::
device
::
device_conv3d_fwd_instance
::
add_device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instances
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
int8_t
>
(
conv_ptrs
);
return
test_conv3d_ndhwc_instances
<
int8_t
>
(
ck
::
utils
::
conv
::
ConvolutionFwdInstances
<
int8_t
,
int8_t
,
int8_t
>::
Get
<
3
>
());
}
}
// anonymous namespace
...
...
@@ -293,7 +238,7 @@ int main()
std
::
cout
<<
"
\n
test_conv3d_ndhwc_f32_instances ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
res
=
test_conv3d_ndhwc_int8_instances
();
std
::
cout
<<
"
\n
test_conv3d_ndhw
_
cint
_
8instances ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
std
::
cout
<<
"
\n
test_conv3d_ndhwc
_
int8
_
instances ..... "
<<
(
res
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
return
res
?
0
:
1
;
...
...
test/convnd_fwd/conv_util.hpp
View file @
76ee0baf
...
...
@@ -10,7 +10,8 @@
#include "host_tensor.hpp"
#include "sequence.hpp"
namespace
{
namespace
test
{
namespace
conv
{
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
@@ -19,6 +20,9 @@ using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
DeviceConvFwdNoOpPtr
=
ck
::
tensor_operation
::
device
::
DeviceConvFwdPtr
<
InElementOp
,
WeiElementOp
,
OutElementOp
>
;
static
constexpr
auto
ConvFwdDefault
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
...
...
@@ -62,26 +66,14 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device::
1
>
;
// CThreadTransferDstScalarPerVector
// clang-format on
}
// namespace
namespace
test
{
namespace
conv
{
template
<
ck
::
index_t
NDim
,
typename
InDataType
=
float
,
typename
WeiDataType
=
float
,
typename
OutDataType
=
float
>
void
RunConv
(
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
const
Tensor
<
InDataType
>&
input
,
const
Tensor
<
WeiDataType
>&
weights
,
Tensor
<
OutDataType
>&
output
)
void
get_test_convolution_fwd_instance
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
instances
)
{
ck
::
utils
::
conv
::
run_convolution_forward
<
NDim
,
InDataType
,
WeiDataType
,
OutDataType
,
DeviceConvNDFwdInstance
>
(
params
,
input
,
weights
,
output
);
using
ConvInstanceT
=
DeviceConvNDFwdInstance
<
NDim
,
InDataType
,
WeiDataType
,
OutDataType
>
;
instances
.
emplace_back
(
std
::
make_unique
<
ConvInstanceT
>
());
}
}
// namespace conv
...
...
test/reference_conv_fwd/CMakeLists.txt
View file @
76ee0baf
add_test_executable
(
test_reference_conv_fwd reference_conv_fwd.cpp
)
target_link_libraries
(
test_reference_conv_fwd PRIVATE host_tensor
)
target_link_libraries
(
test_reference_conv_fwd PRIVATE host_tensor
conv_fwd_util
)
test/reference_conv_fwd/reference_conv_fwd.cpp
View file @
76ee0baf
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <half.hpp>
...
...
@@ -10,6 +9,7 @@
#include "config.hpp"
#include "conv_fwd_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "host_tensor.hpp"
#include "reference_conv_fwd.hpp"
#include "tensor_layout.hpp"
...
...
@@ -19,35 +19,6 @@ using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
typename
T
>
struct
FillMonotonicSeq
{
T
m_init_value
{
0
};
T
m_step
{
1
};
template
<
typename
ForwardIter
>
void
operator
()(
ForwardIter
first
,
ForwardIter
last
)
const
{
std
::
generate
(
first
,
last
,
[
=
,
n
=
m_init_value
]()
mutable
{
auto
tmp
=
n
;
n
+=
m_step
;
return
tmp
;
});
}
};
template
<
typename
T
>
struct
FillConstant
{
T
m_value
{
0
};
template
<
typename
ForwardIter
>
void
operator
()(
ForwardIter
first
,
ForwardIter
last
)
const
{
std
::
fill
(
first
,
last
,
m_value
);
}
};
template
<
ck
::
index_t
NDim
,
typename
InDataType
=
float
,
typename
WeiDataType
=
float
,
...
...
@@ -55,8 +26,8 @@ template <ck::index_t NDim,
typename
InLayout
=
ck
::
tensor_layout
::
convolution
::
NHWC
,
typename
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
KYXC
,
typename
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NHWK
,
typename
FillInputOp
=
FillMonotonicSeq
<
InDataType
>,
typename
FillWeightsOp
=
FillConstant
<
WeiDataType
>>
typename
FillInputOp
=
ck
::
utils
::
FillMonotonicSeq
<
InDataType
>,
typename
FillWeightsOp
=
ck
::
utils
::
FillConstant
<
WeiDataType
>>
Tensor
<
OutDataType
>
run_reference_convolution_forward
(
const
ck
::
utils
::
conv
::
ConvParams
&
params
,
const
FillInputOp
&
fill_input_op
=
FillInputOp
{},
...
...
@@ -251,7 +222,7 @@ bool test_conv1d_nwc()
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
>
(
params
,
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
params
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
2
,
16
,
16
};
ref_data
=
std
::
vector
<
float
>
{
...
...
@@ -349,7 +320,7 @@ bool test_conv3d_ncdhw()
ck
::
tensor_layout
::
convolution
::
NCDHW
,
ck
::
tensor_layout
::
convolution
::
KCZYX
,
ck
::
tensor_layout
::
convolution
::
NKDHW
>
(
params
,
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
params
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
std
::
vector
<
std
::
size_t
>
ref_dims
{
1
,
1
,
4
,
4
,
4
};
std
::
vector
<
float
>
ref_data
{
407.7
,
410.40002
,
413.09998
,
415.80002
,
423.90002
,
426.6
,
429.30002
,
432.
,
...
...
@@ -383,7 +354,7 @@ bool test_conv3d_ncdhw()
ck
::
tensor_layout
::
convolution
::
NCDHW
,
ck
::
tensor_layout
::
convolution
::
KCZYX
,
ck
::
tensor_layout
::
convolution
::
NKDHW
>
(
params
,
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
params
,
ck
::
utils
::
FillMonotonicSeq
<
float
>
{
0.
f
,
0.1
f
});
ref_dims
=
std
::
vector
<
std
::
size_t
>
{
1
,
2
,
4
,
4
,
4
};
ref_data
=
std
::
vector
<
float
>
{
2756.7002
,
2764.7998
,
2772.9001
,
2781.
,
2853.9001
,
2862.
,
2870.1
,
2878.2002
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment