Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0a35a4e1
Commit
0a35a4e1
authored
Nov 13, 2019
by
Chao Liu
Browse files
refactor host code
parent
a34977df
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
118 additions
and
101 deletions
+118
-101
composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
...gorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
+3
-2
composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp
...l/include/tensor_description/ConstantMatrixDescriptor.hpp
+1
-1
composable_kernel/include/tensor_description/tensor_coordinate.hpp
...e_kernel/include/tensor_description/tensor_coordinate.hpp
+2
-2
composable_kernel/include/tensor_description/tensor_coordinate_deprecated.hpp
...clude/tensor_description/tensor_coordinate_deprecated.hpp
+2
-2
composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp
...l/include/tensor_description/tensor_descriptor_helper.hpp
+6
-6
driver/include/conv_common.hpp
driver/include/conv_common.hpp
+29
-14
driver/include/device_tensor.hpp
driver/include/device_tensor.hpp
+28
-0
driver/include/host_conv.hpp
driver/include/host_conv.hpp
+0
-66
driver/include/tensor.hpp
driver/include/tensor.hpp
+41
-4
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+2
-2
driver/src/tensor.cpp
driver/src/tensor.cpp
+4
-2
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp
View file @
0a35a4e1
...
...
@@ -93,8 +93,9 @@ struct GridwiseConvolutionDirect_v2_nchw_kcyx_nkhw
constexpr
auto
wei_kcyx_thread_block_desc
=
make_ConstantTensorDescriptor
(
Sequence
<
KPerThread
,
CPerThread
,
Y
,
X
>
{},
wei_kcyx_block_desc
.
GetStrides
());
constexpr
auto
out_nkhw_thread_desc
=
get_convolution_output_default_4d_tensor_descriptor
(
in_nchw_thread_block_desc
,
wei_kcyx_thread_block_desc
);
constexpr
auto
out_nkhw_thread_desc
=
get_convolution_output_default_4d_tensor_descriptor_deprecated
(
in_nchw_thread_block_desc
,
wei_kcyx_thread_block_desc
);
// register
Float
p_out_thread
[
out_nkhw_thread_desc
.
GetElementSpace
()];
...
...
composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp
View file @
0a35a4e1
...
...
@@ -60,7 +60,7 @@ __host__ __device__ constexpr auto
template
<
typename
...
Ts
>
__host__
__device__
constexpr
auto
make_ConstantMatrixDescriptor
(
ConstantTensorDescriptor_deprecated
<
Ts
...
>
)
make_ConstantMatrixDescriptor
(
ConstantTensorDescriptor_deprecated
<
Ts
...
>
)
{
using
TDesc
=
ConstantTensorDescriptor_deprecated
<
Ts
...
>
;
static_assert
(
TDesc
::
GetNumOfDimension
()
==
2
,
"wrong"
);
...
...
composable_kernel/include/tensor_description/tensor_coordinate.hpp
View file @
0a35a4e1
...
...
@@ -228,7 +228,7 @@ struct TensorCoordinate
private:
template
<
typename
...
Ts
>
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
NativeTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
NativeTensorDescriptor
<
Ts
...
>
)
{
return
NativeTensorCoordinate
<
NativeTensorDescriptor
<
Ts
...
>>
(
make_zero_array
<
index_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
...
...
@@ -236,7 +236,7 @@ struct TensorCoordinate
template
<
typename
...
Ts
>
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
TransformedTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
TransformedTensorDescriptor
<
Ts
...
>
)
{
return
TransformedTensorCoordinate
<
TransformedTensorDescriptor
<
Ts
...
>>
(
make_zero_array
<
index_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
...
...
composable_kernel/include/tensor_description/tensor_coordinate_deprecated.hpp
View file @
0a35a4e1
...
...
@@ -327,14 +327,14 @@ struct TensorCoordinate_deprecated
private:
template
<
class
...
Ts
>
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
ConstantTensorDescriptor_deprecated
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
ConstantTensorDescriptor_deprecated
<
Ts
...
>
)
{
return
NormalTensorCoordinate_deprecated
<
ConstantTensorDescriptor_deprecated
<
Ts
...
>>
();
}
template
<
class
...
Ts
>
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
ConstantMergedTensorDescriptor_deprecated
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
ConstantMergedTensorDescriptor_deprecated
<
Ts
...
>
)
{
return
MergedTensorCoordinate_deprecated
<
ConstantMergedTensorDescriptor_deprecated
<
Ts
...
>>
();
...
...
composable_kernel/include/tensor_description/tensor_descriptor_helper.hpp
View file @
0a35a4e1
...
...
@@ -64,10 +64,10 @@ template <typename LowerTensorDescriptor,
index_t
...
LowerDimensionIds
,
index_t
...
UpperDimensionIds
>
__host__
__device__
constexpr
auto
reorder_transformed_tensor_descriptor_impl
(
LowerTensorDescriptor
,
Sequence
<
LowerLengths
...
>
,
Sequence
<
LowerDimensionIds
...
>
,
Sequence
<
UpperDimensionIds
...
>
)
reorder_transformed_tensor_descriptor_impl
(
LowerTensorDescriptor
,
Sequence
<
LowerLengths
...
>
,
Sequence
<
LowerDimensionIds
...
>
,
Sequence
<
UpperDimensionIds
...
>
)
{
return
TransformedTensorDescriptor
<
LowerTensorDescriptor
,
Tuple
<
PassThrough
<
LowerLengths
>
...
>
,
...
...
@@ -78,7 +78,7 @@ __host__ __device__ constexpr auto
// reorder a NativeTensorDescriptor
template
<
typename
...
Ts
,
typename
MapLower2Upper
>
__host__
__device__
constexpr
auto
reorder_tensor_descriptor_given_lower2upper
(
NativeTensorDescriptor
<
Ts
...
>
,
MapLower2Upper
)
reorder_tensor_descriptor_given_lower2upper
(
NativeTensorDescriptor
<
Ts
...
>
,
MapLower2Upper
)
{
static_assert
(
is_valid_sequence_map
<
MapLower2Upper
>
{},
"wrong! MapLower2Upper is not a valid map"
);
...
...
@@ -96,7 +96,7 @@ __host__ __device__ constexpr auto
// reorder a TransformedTensorDescriptor
template
<
typename
...
Ts
,
typename
MapLower2Upper
>
__host__
__device__
constexpr
auto
reorder_tensor_descriptor_given_lower2upper
(
TransformedTensorDescriptor
<
Ts
...
>
,
MapLower2Upper
)
reorder_tensor_descriptor_given_lower2upper
(
TransformedTensorDescriptor
<
Ts
...
>
,
MapLower2Upper
)
{
static_assert
(
is_valid_sequence_map
<
MapLower2Upper
>
{},
"wrong! MapLower2Upper is not a valid map"
);
...
...
driver/include/conv_common.hpp
View file @
0a35a4e1
...
...
@@ -2,10 +2,16 @@
#define CONV_COMMON_HPP
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "tensor_descriptor.hpp"
// this is ugly, only for 4d
template
<
class
InDesc
,
class
WeiDesc
>
constexpr
auto
get_convolution_output_default_4d_tensor_descriptor
(
InDesc
,
WeiDesc
)
template
<
class
InDesc
,
class
WeiDesc
,
class
ConvStrides
,
class
ConvDilations
,
class
LowerPads
,
class
UpperPads
>
constexpr
auto
get_convolution_output_default_4d_tensor_descriptor_deprecated
(
InDesc
,
WeiDesc
,
ConvStrides
,
ConvDilations
,
LowerPads
,
UpperPads
)
{
using
namespace
ck
;
...
...
@@ -22,18 +28,27 @@ constexpr auto get_convolution_output_default_4d_tensor_descriptor(InDesc, WeiDe
static_assert
(
in_desc
.
GetLength
(
I1
)
==
wei_desc
.
GetLength
(
I1
),
"input & weight dimension not consistent"
);
constexpr
auto
N
=
in_desc
.
GetLength
(
I0
);
constexpr
auto
HI
=
in_desc
.
GetLength
(
I2
);
constexpr
auto
WI
=
in_desc
.
GetLength
(
I3
);
constexpr
index_t
N
=
in_desc
.
GetLength
(
I0
);
constexpr
index_t
Hi
=
in_desc
.
GetLength
(
I2
);
constexpr
index_t
Wi
=
in_desc
.
GetLength
(
I3
);
constexpr
index_t
K
=
wei_desc
.
GetLength
(
I0
);
constexpr
index_t
Y
=
wei_desc
.
GetLength
(
I2
);
constexpr
index_t
X
=
wei_desc
.
GetLength
(
I3
);
constexpr
index_t
HPadLow
=
LowerPads
{}.
Get
(
I0
);
constexpr
index_t
WPadLow
=
LowerPads
{}.
Get
(
I1
);
constexpr
auto
K
=
wei_desc
.
GetLength
(
I0
);
constexpr
auto
Y
=
wei_desc
.
GetLength
(
I2
);
constexpr
auto
X
=
wei_desc
.
GetLength
(
I3
);
constexpr
index_t
HPadUp
=
UpperPads
{}.
Get
(
I0
);
constexpr
index_t
WPadUp
=
UpperPads
{}.
Get
(
I1
);
constexpr
auto
HO
=
HI
+
1
-
Y
;
constexpr
auto
WO
=
WI
+
1
-
X
;
constexpr
index_t
YEff
=
(
Y
-
1
)
*
ConvDilations
{}[
0
]
+
1
;
constexpr
index_t
XEff
=
(
X
-
1
)
*
ConvDilations
{}[
1
]
+
1
;
constexpr
index_t
Ho
=
(
Hi
+
HPadLow
+
HPadUp
-
YEff
)
/
ConvStrides
{}[
0
]
+
1
;
constexpr
index_t
Wo
=
(
Wi
+
WPadLow
+
WPadUp
-
XEff
)
/
ConvStrides
{}[
1
]
+
1
;
return
make_ConstantTensorDescriptor_packed
(
Sequence
<
N
,
K
,
H
O
,
W
O
>
{});
return
make_ConstantTensorDescriptor_packed
(
Sequence
<
N
,
K
,
H
o
,
W
o
>
{});
}
template
<
class
InDesc
,
...
...
@@ -42,7 +57,7 @@ template <class InDesc,
class
ConvDilations
,
class
LowerPads
,
class
UpperPads
>
constexpr
auto
get_convolution_
with_padding_
output_default_4d_tensor_descriptor
(
constexpr
auto
get_convolution_output_default_4d_tensor_descriptor
(
InDesc
,
WeiDesc
,
ConvStrides
,
ConvDilations
,
LowerPads
,
UpperPads
)
{
using
namespace
ck
;
...
...
@@ -80,7 +95,7 @@ constexpr auto get_convolution_with_padding_output_default_4d_tensor_descriptor(
constexpr
index_t
Ho
=
(
Hi
+
HPadLow
+
HPadUp
-
YEff
)
/
ConvStrides
{}[
0
]
+
1
;
constexpr
index_t
Wo
=
(
Wi
+
WPadLow
+
WPadUp
-
XEff
)
/
ConvStrides
{}[
1
]
+
1
;
return
make_
ConstantT
ensor
D
escriptor_packed
(
Sequence
<
N
,
K
,
Ho
,
Wo
>
{});
return
make_
native_t
ensor
_d
escriptor_packed
(
Sequence
<
N
,
K
,
Ho
,
Wo
>
{});
}
template
<
class
InDesc
,
class
WeiDesc
,
class
OutDesc
>
...
...
driver/include/device_tensor.hpp
0 → 100644
View file @
0a35a4e1
#pragma once
#include "tensor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "tensor_descriptor.hpp"
template
<
typename
ConstTensorDesc
,
std
::
size_t
...
Is
>
auto
make_TensorDescriptor_impl
(
ConstTensorDesc
,
std
::
integer_sequence
<
std
::
size_t
,
Is
...
>
)
{
std
::
initializer_list
<
std
::
size_t
>
lengths
=
{
ConstTensorDesc
::
GetLength
(
Is
)...};
std
::
initializer_list
<
std
::
size_t
>
strides
=
{
ConstTensorDesc
::
GetStride
(
Is
)...};
return
TensorDescriptor
(
lengths
,
strides
);
}
template
<
typename
ConstTensorDesc
>
auto
make_TensorDescriptor
(
ConstTensorDesc
)
{
return
make_TensorDescriptor_impl
(
ConstTensorDesc
{},
std
::
make_integer_sequence
<
std
::
size_t
,
ConstTensorDesc
::
GetNumOfDimension
()
>
{});
}
template
<
typename
ConstTensorDesc
>
void
ostream_ConstantTensorDescriptor
(
ConstTensorDesc
,
std
::
ostream
&
os
=
std
::
cout
)
{
ostream_TensorDescriptor
(
make_TensorDescriptor
(
ConstTensorDesc
{}),
os
);
}
driver/include/host_conv.hpp
View file @
0a35a4e1
#pragma once
#include "tensor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor_deprecated.hpp"
// this is ugly, only for 4d
template
<
class
TConstTensorDesc
>
void
ostream_ConstantTensorDescriptor
(
TConstTensorDesc
,
std
::
ostream
&
os
=
std
::
cout
)
{
using
namespace
ck
;
static_assert
(
TConstTensorDesc
::
nDim
==
4
,
"nDim is not 4"
);
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
desc
=
TConstTensorDesc
{};
os
<<
"Lengths: {"
<<
desc
.
GetLength
(
I0
)
<<
", "
<<
desc
.
GetLength
(
I1
)
<<
", "
<<
desc
.
GetLength
(
I2
)
<<
", "
<<
desc
.
GetLength
(
I3
)
<<
"}, "
<<
"Strides: {"
<<
desc
.
GetStride
(
I0
)
<<
", "
<<
desc
.
GetStride
(
I1
)
<<
", "
<<
desc
.
GetStride
(
I2
)
<<
", "
<<
desc
.
GetStride
(
I3
)
<<
"}"
<<
std
::
endl
;
}
// this is ugly, only for 4d
template
<
class
TConstTensorDesc
>
auto
make_TensorDescriptor
(
TConstTensorDesc
)
{
using
namespace
ck
;
static_assert
(
TConstTensorDesc
::
nDim
==
4
,
"nDim is not 4"
);
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
desc
=
TConstTensorDesc
{};
std
::
initializer_list
<
index_t
>
lengths
=
{
desc
.
GetLength
(
I0
),
desc
.
GetLength
(
I1
),
desc
.
GetLength
(
I2
),
desc
.
GetLength
(
I3
)};
std
::
initializer_list
<
index_t
>
strides
=
{
desc
.
GetStride
(
I0
),
desc
.
GetStride
(
I1
),
desc
.
GetStride
(
I2
),
desc
.
GetStride
(
I3
)};
return
TensorDescriptor
(
lengths
,
strides
);
}
template
<
class
TIn
,
class
TWei
,
...
...
@@ -331,25 +287,3 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
make_ParallelTensorFunctor
(
f_out_hold
,
N
,
K
,
HTile
,
WTile
)(
num_thread
);
make_ParallelTensorFunctor
(
f_out
,
N
,
K
,
HTile
,
WTile
)(
num_thread
);
}
template
<
class
T
>
void
check_error
(
const
Tensor
<
T
>&
ref
,
const
Tensor
<
T
>&
result
)
{
float
error
=
0
;
float
max_diff
=
-
1
;
float
ref_value
=
0
,
result_value
=
0
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
{
error
+=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
if
(
max_diff
<
diff
)
{
max_diff
=
diff
;
ref_value
=
ref
.
mData
[
i
];
result_value
=
result
.
mData
[
i
];
}
}
std
::
cout
<<
"error: "
<<
error
<<
std
::
endl
;
std
::
cout
<<
"max_diff: "
<<
max_diff
<<
", "
<<
ref_value
<<
", "
<<
result_value
<<
std
::
endl
;
}
driver/include/tensor.hpp
View file @
0a35a4e1
...
...
@@ -68,10 +68,12 @@ auto construct_f_unpack_args(F, T args)
struct
TensorDescriptor
{
TensorDescriptor
()
=
delete
;
TensorDescriptor
(
std
::
initializer_list
<
std
::
size_t
>
lens
);
TensorDescriptor
(
std
::
initializer_list
<
std
::
size_t
>
lens
,
std
::
initializer_list
<
std
::
size_t
>
strides
);
TensorDescriptor
(
std
::
vector
<
std
::
size_t
>
lens
,
std
::
vector
<
std
::
size_t
>
strides
);
template
<
typename
X
>
TensorDescriptor
(
std
::
vector
<
X
>
lens
);
template
<
typename
X
,
typename
Y
>
TensorDescriptor
(
std
::
vector
<
X
>
lens
,
std
::
vector
<
Y
>
strides
);
void
CalculateStrides
();
...
...
@@ -269,4 +271,39 @@ struct Tensor
std
::
vector
<
T
>
mData
;
};
void
ostream_TensorDescriptor
(
const
TensorDescriptor
&
desc
,
std
::
ostream
&
os
=
std
::
cout
)
{
os
<<
"dim "
<<
desc
.
GetNumOfDimension
()
<<
", "
;
os
<<
"lengths {"
;
LogRange
(
os
,
desc
.
GetLengths
(),
", "
);
os
<<
"}, "
;
os
<<
"strides {"
;
LogRange
(
os
,
desc
.
GetStrides
(),
", "
);
os
<<
"}"
<<
std
::
endl
;
}
template
<
class
T
>
void
check_error
(
const
Tensor
<
T
>&
ref
,
const
Tensor
<
T
>&
result
)
{
float
error
=
0
;
float
max_diff
=
-
1
;
float
ref_value
=
0
,
result_value
=
0
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
{
error
+=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
if
(
max_diff
<
diff
)
{
max_diff
=
diff
;
ref_value
=
ref
.
mData
[
i
];
result_value
=
result
.
mData
[
i
];
}
}
std
::
cout
<<
"error: "
<<
error
<<
std
::
endl
;
std
::
cout
<<
"max_diff: "
<<
max_diff
<<
", "
<<
ref_value
<<
", "
<<
result_value
<<
std
::
endl
;
}
#endif
driver/src/conv_driver.cpp
View file @
0a35a4e1
...
...
@@ -11,6 +11,7 @@
#include "tensor_generator.hpp"
#include "conv_common.hpp"
#include "host_conv.hpp"
#include "device_tensor.hpp"
//#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
//#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"
//#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn_padded.hpp"
...
...
@@ -24,7 +25,6 @@
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw_deprecated.hpp"
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
using
namespace
ck
;
...
...
@@ -315,7 +315,7 @@ int main(int argc, char* argv[])
auto
in_nchw_desc
=
make_ConstantTensorDescriptor_packed
(
Sequence
<
N
,
C
,
HI
,
WI
>
{});
auto
wei_kcyx_desc
=
make_ConstantTensorDescriptor_packed
(
Sequence
<
K
,
C
,
Y
,
X
>
{});
auto
out_nkhw_desc
=
get_convolution_
with_padding_
output_default_4d_tensor_descriptor
(
auto
out_nkhw_desc
=
get_convolution_output_default_4d_tensor_descriptor
_deprecated
(
in_nchw_desc
,
wei_kcyx_desc
,
ConvStrides
{},
ConvDilations
{},
LeftPads
{},
RightPads
{});
ostream_ConstantTensorDescriptor
(
in_nchw_desc
,
std
::
cout
<<
"in_nchw_desc: "
);
...
...
driver/src/tensor.cpp
View file @
0a35a4e1
...
...
@@ -3,12 +3,14 @@
#include "tensor.hpp"
TensorDescriptor
::
TensorDescriptor
(
std
::
initializer_list
<
std
::
size_t
>
lens
)
:
mLens
(
lens
)
template
<
typename
X
>
TensorDescriptor
::
TensorDescriptor
(
std
::
vector
<
X
>
lens
)
:
mLens
(
lens
)
{
this
->
CalculateStrides
();
}
TensorDescriptor
::
TensorDescriptor
(
std
::
vector
<
std
::
size_t
>
lens
,
std
::
vector
<
std
::
size_t
>
strides
)
template
<
typename
X
,
typename
Y
>
TensorDescriptor
::
TensorDescriptor
(
std
::
vector
<
X
>
lens
,
std
::
vector
<
Y
>
strides
)
:
mLens
(
lens
),
mStrides
(
strides
)
{
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment