Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
56fc0842
"...git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "32ab994d76353f7a34ae772984a5f9ee97da6b7e"
Commit
56fc0842
authored
Aug 09, 2021
by
Chao Liu
Browse files
tidy
parent
54fba515
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
58 additions
and
52 deletions
+58
-52
host/host_tensor/include/host_conv.hpp
host/host_tensor/include/host_conv.hpp
+15
-17
host/host_tensor/include/host_tensor_generator.hpp
host/host_tensor/include/host_tensor_generator.hpp
+1
-1
host/solver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp
...lver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp
+42
-34
No files found.
host/host_tensor/include/host_conv.hpp
View file @
56fc0842
...
@@ -14,15 +14,13 @@ void host_direct_convolution(const Tensor<TIn>& in,
...
@@ -14,15 +14,13 @@ void host_direct_convolution(const Tensor<TIn>& in,
const
ConvStrides
&
conv_strides
,
const
ConvStrides
&
conv_strides
,
const
ConvDilations
&
conv_dilations
,
const
ConvDilations
&
conv_dilations
,
const
InLeftPads
&
in_left_pads
,
const
InLeftPads
&
in_left_pads
,
const
InRightPads
&
in_right_pads
,
const
InRightPads
&
,
const
ConvTensorLayout
layout
=
ConvTensorLayout
::
NCHW
)
const
ConvTensorLayout
layout
=
ConvTensorLayout
::
NCHW
)
{
{
using
namespace
ck
;
using
namespace
ck
;
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
double
v
=
0
;
double
v
=
0
;
...
@@ -68,23 +66,25 @@ void host_direct_convolution(const Tensor<TIn>& in,
...
@@ -68,23 +66,25 @@ void host_direct_convolution(const Tensor<TIn>& in,
out
(
n
,
ho
,
wo
,
k
)
=
v
;
out
(
n
,
ho
,
wo
,
k
)
=
v
;
};
};
switch
(
layout
)
if
(
layout
==
ConvTensorLayout
::
NCHW
)
{
{
case
ConvTensorLayout
::
NCHW
:
make_ParallelTensorFunctor
(
f_nchw
,
make_ParallelTensorFunctor
(
f_nchw
,
out
.
mDesc
.
GetLengths
()[
0
],
out
.
mDesc
.
GetLengths
()[
0
],
out
.
mDesc
.
GetLengths
()[
1
],
out
.
mDesc
.
GetLengths
()[
1
],
out
.
mDesc
.
GetLengths
()[
2
],
out
.
mDesc
.
GetLengths
()[
2
],
out
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
out
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
break
;
}
case
ConvTensorLayout
::
NHWC
:
else
if
(
layout
==
ConvTensorLayout
::
NHWC
)
{
make_ParallelTensorFunctor
(
f_nhwc
,
make_ParallelTensorFunctor
(
f_nhwc
,
out
.
mDesc
.
GetLengths
()[
0
],
out
.
mDesc
.
GetLengths
()[
0
],
out
.
mDesc
.
GetLengths
()[
1
],
out
.
mDesc
.
GetLengths
()[
1
],
out
.
mDesc
.
GetLengths
()[
2
],
out
.
mDesc
.
GetLengths
()[
2
],
out
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
out
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
break
;
}
default:
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
else
{
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
}
}
}
}
...
@@ -100,17 +100,15 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
...
@@ -100,17 +100,15 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
constexpr
std
::
size_t
HoPerTile
=
2
;
constexpr
std
::
size_t
HoPerTile
=
2
;
constexpr
std
::
size_t
WoPerTile
=
2
;
constexpr
std
::
size_t
WoPerTile
=
2
;
std
::
size_t
N
=
in_nchw
.
mDesc
.
GetLengths
()[
0
];
std
::
size_t
N
=
in_nchw
.
mDesc
.
GetLengths
()[
0
];
std
::
size_t
C
=
in_nchw
.
mDesc
.
GetLengths
()[
1
];
std
::
size_t
C
=
in_nchw
.
mDesc
.
GetLengths
()[
1
];
std
::
size_t
HI
=
in_nchw
.
mDesc
.
GetLengths
()[
2
];
std
::
size_t
WI
=
in_nchw
.
mDesc
.
GetLengths
()[
3
];
std
::
size_t
K
=
wei_kcyx
.
mDesc
.
GetLengths
()[
0
];
std
::
size_t
K
=
wei_kcyx
.
mDesc
.
GetLengths
()[
0
];
std
::
size_t
Y
=
wei_kcyx
.
mDesc
.
GetLengths
()[
2
];
std
::
size_t
Y
=
wei_kcyx
.
mDesc
.
GetLengths
()[
2
];
std
::
size_t
X
=
wei_kcyx
.
mDesc
.
GetLengths
()[
3
];
std
::
size_t
X
=
wei_kcyx
.
mDesc
.
GetLengths
()[
3
];
std
::
size_t
H
O
=
out_nkhw
.
mDesc
.
GetLengths
()[
2
];
std
::
size_t
H
o
=
out_nkhw
.
mDesc
.
GetLengths
()[
2
];
std
::
size_t
W
O
=
out_nkhw
.
mDesc
.
GetLengths
()[
3
];
std
::
size_t
W
o
=
out_nkhw
.
mDesc
.
GetLengths
()[
3
];
index_t
h_pad_low
=
InLeftPads
{}.
Get
(
Number
<
0
>
{});
index_t
h_pad_low
=
InLeftPads
{}.
Get
(
Number
<
0
>
{});
index_t
w_pad_low
=
InLeftPads
{}.
Get
(
Number
<
1
>
{});
index_t
w_pad_low
=
InLeftPads
{}.
Get
(
Number
<
1
>
{});
...
@@ -118,8 +116,8 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
...
@@ -118,8 +116,8 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
std
::
size_t
HiPerTile
=
HoPerTile
+
Y
-
1
;
std
::
size_t
HiPerTile
=
HoPerTile
+
Y
-
1
;
std
::
size_t
WiPerTile
=
WoPerTile
+
X
-
1
;
std
::
size_t
WiPerTile
=
WoPerTile
+
X
-
1
;
std
::
size_t
HTile
=
(
H
O
+
HoPerTile
-
1
)
/
HoPerTile
;
std
::
size_t
HTile
=
(
H
o
+
HoPerTile
-
1
)
/
HoPerTile
;
std
::
size_t
WTile
=
(
W
O
+
WoPerTile
-
1
)
/
WoPerTile
;
std
::
size_t
WTile
=
(
W
o
+
WoPerTile
-
1
)
/
WoPerTile
;
Tensor
<
double
>
in_hold
({
N
,
C
,
HTile
,
WTile
,
HiPerTile
,
WiPerTile
});
Tensor
<
double
>
in_hold
({
N
,
C
,
HTile
,
WTile
,
HiPerTile
,
WiPerTile
});
Tensor
<
double
>
in_transform
({
N
,
C
,
HTile
,
WTile
,
HiPerTile
,
WiPerTile
});
Tensor
<
double
>
in_transform
({
N
,
C
,
HTile
,
WTile
,
HiPerTile
,
WiPerTile
});
...
...
host/host_tensor/include/host_tensor_generator.hpp
View file @
56fc0842
...
@@ -9,7 +9,7 @@ struct GeneratorTensor_1
...
@@ -9,7 +9,7 @@ struct GeneratorTensor_1
int
value
=
1
;
int
value
=
1
;
template
<
typename
...
Is
>
template
<
typename
...
Is
>
float
operator
()(
Is
...
is
)
float
operator
()(
Is
...)
{
{
return
value
;
return
value
;
}
}
...
...
host/solver/include/conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw.hpp
View file @
56fc0842
...
@@ -99,40 +99,48 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
...
@@ -99,40 +99,48 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
// clang-format on
// clang-format on
}
}
ck
::
DataTypeEnum_t
ABDataTypeEnum
;
ck
::
DataTypeEnum_t
ABDataTypeEnum
=
ck
::
DataTypeEnum_t
::
Unknown
;
ck
::
DataTypeEnum_t
AccDataTypeEnum
;
ck
::
DataTypeEnum_t
AccDataTypeEnum
=
ck
::
DataTypeEnum_t
::
Unknown
;
ck
::
DataTypeEnum_t
CDataTypeEnum
;
ck
::
DataTypeEnum_t
CDataTypeEnum
=
ck
::
DataTypeEnum_t
::
Unknown
;
int
BlockSize
;
int
BlockSize
=
1
;
int
GN0
;
int
GN0
=
-
1
;
int
GK1
;
int
GK1
=
-
1
;
int
GM1PerBlockGM11
;
int
GM1PerBlockGM11
=
-
1
;
int
GN1PerBlockGN11
;
int
GN1PerBlockGN11
=
-
1
;
int
GK0PerBlock
;
int
GK0PerBlock
=
-
1
;
int
BM1PerThreadBM11
;
int
BM1PerThreadBM11
=
-
1
;
int
BN1PerThreadBN11
;
int
BN1PerThreadBN11
=
-
1
;
int
BK0PerThread
;
int
BK0PerThread
=
-
1
;
std
::
array
<
int
,
2
>
BM10BN10ThreadClusterBM10Xs
;
std
::
array
<
int
,
2
>
BM10BN10ThreadClusterBM10Xs
=
{
-
1
,
-
1
};
std
::
array
<
int
,
2
>
BM10BN10ThreadClusterBN10Xs
;
std
::
array
<
int
,
2
>
BM10BN10ThreadClusterBN10Xs
=
{
-
1
,
-
1
};
std
::
array
<
int
,
5
>
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1
;
std
::
array
<
int
,
5
>
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1
=
{
std
::
array
<
int
,
5
>
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1
;
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
array
<
int
,
5
>
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1
;
std
::
array
<
int
,
5
>
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1
=
{
std
::
array
<
int
,
5
>
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1
;
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
array
<
int
,
5
>
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1
=
{
std
::
array
<
int
,
5
>
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1
;
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
array
<
int
,
5
>
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1
;
std
::
array
<
int
,
5
>
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1
=
{
std
::
array
<
int
,
5
>
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1
;
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
array
<
int
,
5
>
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1
;
std
::
array
<
int
,
5
>
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1
=
{
int
CThreadTransferDstScalarPerVector
;
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
array
<
int
,
5
>
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1
=
{
bool
HasMainKBlockLoop
;
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
bool
HasDoubleTailKBlockLoop
;
std
::
array
<
int
,
5
>
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1
=
{
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
array
<
int
,
5
>
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1
=
{
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
int
CThreadTransferDstScalarPerVector
=
-
1
;
bool
HasMainKBlockLoop
=
false
;
bool
HasDoubleTailKBlockLoop
=
false
;
};
};
struct
TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw
struct
TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment