Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
1eafc9c1
"...resnet50_tensorflow.git" did not exist on "f89dff64a2a6a34ba11507a446885571ecaee513"
Commit
1eafc9c1
authored
Nov 28, 2018
by
Chao Liu
Browse files
refactor
parent
fee92fb6
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
22 deletions
+27
-22
driver/conv.cu
driver/conv.cu
+14
-6
driver/device_direct_convolution_1.cuh
driver/device_direct_convolution_1.cuh
+4
-4
src/include/gridwise_winograd_convolution.cuh
src/include/gridwise_winograd_convolution.cuh
+6
-12
src/include/threadwise_tensor_op.cuh
src/include/threadwise_tensor_op.cuh
+3
-0
No files found.
driver/conv.cu
View file @
1eafc9c1
...
@@ -302,7 +302,7 @@ template <class T>
...
@@ -302,7 +302,7 @@ template <class T>
void
check_error
(
const
Tensor
<
T
>&
ref
,
const
Tensor
<
T
>&
result
)
void
check_error
(
const
Tensor
<
T
>&
ref
,
const
Tensor
<
T
>&
result
)
{
{
float
error
=
0
;
float
error
=
0
;
float
max_diff
=
0
;
float
max_diff
=
-
1
;
float
ref_value
=
0
,
result_value
=
0
;
float
ref_value
=
0
,
result_value
=
0
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
{
{
...
@@ -338,6 +338,14 @@ int main()
...
@@ -338,6 +338,14 @@ int main()
constexpr
unsigned
K
=
64
;
constexpr
unsigned
K
=
64
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
constexpr
unsigned
N
=
72
;
constexpr
unsigned
C
=
288
;
constexpr
unsigned
HI
=
38
;
constexpr
unsigned
WI
=
38
;
constexpr
unsigned
K
=
72
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
#elif 0
constexpr
unsigned
N
=
1
;
constexpr
unsigned
N
=
1
;
constexpr
unsigned
C
=
1
;
constexpr
unsigned
C
=
1
;
...
...
driver/device_direct_convolution_1.cuh
View file @
1eafc9c1
...
@@ -26,13 +26,13 @@ void device_direct_convolution_1(
...
@@ -26,13 +26,13 @@ void device_direct_convolution_1(
constexpr
auto
out_desc
=
OutDesc
{};
constexpr
auto
out_desc
=
OutDesc
{};
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
NPerBlock
=
1
;
constexpr
unsigned
NPerBlock
=
2
;
constexpr
unsigned
KPerBlock
=
16
;
constexpr
unsigned
KPerBlock
=
16
;
constexpr
unsigned
CPerBlock
=
4
;
constexpr
unsigned
CPerBlock
=
2
;
constexpr
unsigned
YPerBlock
=
4
;
constexpr
unsigned
YPerBlock
=
2
;
constexpr
unsigned
XPerBlock
=
16
;
constexpr
unsigned
XPerBlock
=
16
;
constexpr
unsigned
NPerThread
=
1
;
constexpr
unsigned
NPerThread
=
2
;
constexpr
unsigned
KPerThread
=
4
;
constexpr
unsigned
KPerThread
=
4
;
constexpr
unsigned
CPerThread
=
2
;
constexpr
unsigned
CPerThread
=
2
;
...
...
src/include/gridwise_winograd_convolution.cuh
View file @
1eafc9c1
...
@@ -41,8 +41,8 @@ __global__ void gridwise_winograd_convolution(InGlobalDesc,
...
@@ -41,8 +41,8 @@ __global__ void gridwise_winograd_convolution(InGlobalDesc,
constexpr
unsigned
HoPerBlock
=
OutTileSizeH
*
YPerBlock
;
constexpr
unsigned
HoPerBlock
=
OutTileSizeH
*
YPerBlock
;
constexpr
unsigned
WoPerBlock
=
OutTileSizeW
*
XPerBlock
;
constexpr
unsigned
WoPerBlock
=
OutTileSizeW
*
XPerBlock
;
constexpr
unsigned
HiPerBlock
=
Y
PerBlock
*
OutTileSizeH
+
S
-
1
;
constexpr
unsigned
HiPerBlock
=
Ho
PerBlock
+
S
-
1
;
constexpr
unsigned
WiPerBlock
=
X
PerBlock
*
OutTileSizeW
+
R
-
1
;
constexpr
unsigned
WiPerBlock
=
Wo
PerBlock
+
R
-
1
;
constexpr
unsigned
InTileSizeH
=
OutTileSizeH
+
S
-
1
;
constexpr
unsigned
InTileSizeH
=
OutTileSizeH
+
S
-
1
;
constexpr
unsigned
InTileSizeW
=
OutTileSizeW
+
R
-
1
;
constexpr
unsigned
InTileSizeW
=
OutTileSizeW
+
R
-
1
;
...
@@ -102,11 +102,8 @@ __global__ void gridwise_winograd_convolution(InGlobalDesc,
...
@@ -102,11 +102,8 @@ __global__ void gridwise_winograd_convolution(InGlobalDesc,
constexpr
auto
wei_transform_block_desc
=
constexpr
auto
wei_transform_block_desc
=
make_ConstantTensorDescriptor
(
Sequence
<
KPerBlock
,
CPerBlock
,
InTileSizeH
,
InTileSizeW
>
{});
make_ConstantTensorDescriptor
(
Sequence
<
KPerBlock
,
CPerBlock
,
InTileSizeH
,
InTileSizeW
>
{});
constexpr
unsigned
in_transform_block_size
=
in_transform_block_desc
.
GetElementSpace
();
__shared__
TFloat
p_in_transform_block
[
in_transform_block_desc
.
GetElementSpace
()];
constexpr
unsigned
wei_transform_block_size
=
wei_transform_block_desc
.
GetElementSpace
();
__shared__
TFloat
p_wei_transform_block
[
wei_transform_block_desc
.
GetElementSpace
()];
__shared__
TFloat
p_in_transform_block
[
in_transform_block_size
];
__shared__
TFloat
p_wei_transform_block
[
wei_transform_block_size
];
// thread data
// thread data
constexpr
auto
in_transform_thread_block_desc
=
constexpr
auto
in_transform_thread_block_desc
=
...
@@ -126,11 +123,8 @@ __global__ void gridwise_winograd_convolution(InGlobalDesc,
...
@@ -126,11 +123,8 @@ __global__ void gridwise_winograd_convolution(InGlobalDesc,
constexpr
auto
out_thread_global_desc
=
constexpr
auto
out_thread_global_desc
=
make_ConstantTensorDescriptor
(
out_thread_desc
.
GetLengths
(),
out_global_desc
.
GetStrides
());
make_ConstantTensorDescriptor
(
out_thread_desc
.
GetLengths
(),
out_global_desc
.
GetStrides
());
constexpr
unsigned
out_transform_thread_size
=
out_transform_thread_desc
.
GetElementSpace
();
TFloat
p_out_transform_thread
[
out_transform_thread_desc
.
GetElementSpace
()];
constexpr
unsigned
out_thread_size
=
out_thread_desc
.
GetElementSpace
();
TFloat
p_out_thread
[
out_thread_desc
.
GetElementSpace
()];
TFloat
p_out_transform_thread
[
out_transform_thread_size
];
TFloat
p_out_thread
[
out_thread_size
];
#if 0
#if 0
if(blockIdx.x == 0 && threadIdx.x == 0)
if(blockIdx.x == 0 && threadIdx.x == 0)
...
...
src/include/threadwise_tensor_op.cuh
View file @
1eafc9c1
...
@@ -116,10 +116,13 @@ __device__ void threadwise_4d_tensor_shift_down(Desc, TFloat* __restrict__ p, ID
...
@@ -116,10 +116,13 @@ __device__ void threadwise_4d_tensor_shift_down(Desc, TFloat* __restrict__ p, ID
const
unsigned
did0_end
=
const
unsigned
did0_end
=
is_same
<
decltype
(
I0
),
IDim
>::
value
?
desc
.
GetLength
(
I0
)
-
shift
:
desc
.
GetLength
(
I0
);
is_same
<
decltype
(
I0
),
IDim
>::
value
?
desc
.
GetLength
(
I0
)
-
shift
:
desc
.
GetLength
(
I0
);
const
unsigned
did1_end
=
const
unsigned
did1_end
=
is_same
<
decltype
(
I1
),
IDim
>::
value
?
desc
.
GetLength
(
I1
)
-
shift
:
desc
.
GetLength
(
I1
);
is_same
<
decltype
(
I1
),
IDim
>::
value
?
desc
.
GetLength
(
I1
)
-
shift
:
desc
.
GetLength
(
I1
);
const
unsigned
did2_end
=
const
unsigned
did2_end
=
is_same
<
decltype
(
I2
),
IDim
>::
value
?
desc
.
GetLength
(
I2
)
-
shift
:
desc
.
GetLength
(
I2
);
is_same
<
decltype
(
I2
),
IDim
>::
value
?
desc
.
GetLength
(
I2
)
-
shift
:
desc
.
GetLength
(
I2
);
const
unsigned
did3_end
=
const
unsigned
did3_end
=
is_same
<
decltype
(
I3
),
IDim
>::
value
?
desc
.
GetLength
(
I3
)
-
shift
:
desc
.
GetLength
(
I3
);
is_same
<
decltype
(
I3
),
IDim
>::
value
?
desc
.
GetLength
(
I3
)
-
shift
:
desc
.
GetLength
(
I3
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment